diff --git a/Cargo.lock b/Cargo.lock index d9cfda17ad929..aa8154420ae13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4307,7 +4307,6 @@ name = "rustc_monomorphize" version = "0.0.0" dependencies = [ "rustc_abi", - "rustc_ast", "rustc_data_structures", "rustc_errors", "rustc_fluent_macro", @@ -4316,7 +4315,6 @@ dependencies = [ "rustc_middle", "rustc_session", "rustc_span", - "rustc_symbol_mangling", "rustc_target", "serde", "serde_json", diff --git a/compiler/rustc_builtin_macros/src/autodiff.rs b/compiler/rustc_builtin_macros/src/autodiff.rs index c784477833279..8622df657eda9 100644 --- a/compiler/rustc_builtin_macros/src/autodiff.rs +++ b/compiler/rustc_builtin_macros/src/autodiff.rs @@ -16,11 +16,12 @@ mod llvm_enzyme { use rustc_ast::tokenstream::*; use rustc_ast::visit::AssocCtxt::*; use rustc_ast::{ - self as ast, AssocItemKind, BindingMode, ExprKind, FnRetTy, FnSig, Generics, ItemKind, - MetaItemInner, PatKind, QSelf, TyKind, Visibility, + self as ast, AngleBracketedArg, AngleBracketedArgs, AnonConst, AssocItemKind, BindingMode, + FnRetTy, FnSig, GenericArg, GenericArgs, GenericParamKind, Generics, ItemKind, + MetaItemInner, PatKind, Path, PathSegment, TyKind, Visibility, }; use rustc_expand::base::{Annotatable, ExtCtxt}; - use rustc_span::{Ident, Span, Symbol, kw, sym}; + use rustc_span::{Ident, Span, Symbol, sym}; use thin_vec::{ThinVec, thin_vec}; use tracing::{debug, trace}; @@ -180,11 +181,8 @@ mod llvm_enzyme { } /// We expand the autodiff macro to generate a new placeholder function which passes - /// type-checking and can be called by users. The function body of the placeholder function will - /// later be replaced on LLVM-IR level, so the design of the body is less important and for now - /// should just prevent early inlining and optimizations which alter the function signature. - /// The exact signature of the generated function depends on the configuration provided by the - /// user, but here is an example: + /// type-checking and can be called by users. The exact signature of the generated function + /// depends on the configuration provided by the user, but here is an example: /// /// ``` /// #[autodiff(cos_box, Reverse, Duplicated, Active)] @@ -200,14 +198,8 @@ mod llvm_enzyme { /// f32::sin(**x) /// } /// #[rustc_autodiff(Reverse, Duplicated, Active)] - /// #[inline(never)] /// fn cos_box(x: &Box, dx: &mut Box, dret: f32) -> f32 { - /// unsafe { - /// asm!("NOP"); - /// }; - /// ::core::hint::black_box(sin(x)); - /// ::core::hint::black_box((dx, dret)); - /// ::core::hint::black_box(sin(x)) + /// std::intrinsics::enzyme_autodiff(sin::<>, cos_box::<>, (x, dx, dret)) /// } /// ``` /// FIXME(ZuseZ4): Once autodiff is enabled by default, make this a doc comment which is checked @@ -228,16 +220,24 @@ mod llvm_enzyme { // first get information about the annotable item: visibility, signature, name and generic // parameters. // these will be used to generate the differentiated version of the function - let Some((vis, sig, primal, generics)) = (match &item { - Annotatable::Item(iitem) => extract_item_info(iitem), + let Some((vis, sig, primal, generics, impl_of_trait)) = (match &item { + Annotatable::Item(iitem) => { + extract_item_info(iitem).map(|(v, s, p, g)| (v, s, p, g, false)) + } Annotatable::Stmt(stmt) => match &stmt.kind { - ast::StmtKind::Item(iitem) => extract_item_info(iitem), + ast::StmtKind::Item(iitem) => { + extract_item_info(iitem).map(|(v, s, p, g)| (v, s, p, g, false)) + } _ => None, }, - Annotatable::AssocItem(assoc_item, Impl { .. }) => match &assoc_item.kind { - ast::AssocItemKind::Fn(box ast::Fn { sig, ident, generics, .. }) => { - Some((assoc_item.vis.clone(), sig.clone(), ident.clone(), generics.clone())) - } + Annotatable::AssocItem(assoc_item, Impl { of_trait }) => match &assoc_item.kind { + ast::AssocItemKind::Fn(box ast::Fn { sig, ident, generics, .. }) => Some(( + assoc_item.vis.clone(), + sig.clone(), + ident.clone(), + generics.clone(), + *of_trait, + )), _ => None, }, _ => None, @@ -255,7 +255,6 @@ mod llvm_enzyme { }; let has_ret = has_ret(&sig.decl.output); - let sig_span = ecx.with_call_site_ctxt(sig.span); // create TokenStream from vec elemtents: // meta_item doesn't have a .tokens field @@ -324,19 +323,20 @@ mod llvm_enzyme { } let span = ecx.with_def_site_ctxt(expand_span); - let n_active: u32 = x - .input_activity - .iter() - .filter(|a| **a == DiffActivity::Active || **a == DiffActivity::ActiveOnly) - .count() as u32; - let (d_sig, new_args, idents, errored) = gen_enzyme_decl(ecx, &sig, &x, span); + let d_sig = gen_enzyme_decl(ecx, &sig, &x, span); + let d_body = gen_enzyme_body( - ecx, &x, n_active, &sig, &d_sig, primal, &new_args, span, sig_span, idents, errored, + ecx, + &d_sig, + primal, + span, + first_ident(&meta_item_vec[0]), &generics, + impl_of_trait, ); // The first element of it is the name of the function to be generated - let asdf = Box::new(ast::Fn { + let d_fn = Box::new(ast::Fn { defaultness: ast::Defaultness::Final, sig: d_sig, ident: first_ident(&meta_item_vec[0]), @@ -348,28 +348,10 @@ mod llvm_enzyme { let mut rustc_ad_attr = P(ast::NormalAttr::from_ident(Ident::with_dummy_span(sym::rustc_autodiff))); - let ts2: Vec = vec![TokenTree::Token( - Token::new(TokenKind::Ident(sym::never, false.into()), span), - Spacing::Joint, - )]; - let never_arg = ast::DelimArgs { - dspan: DelimSpan::from_single(span), - delim: ast::token::Delimiter::Parenthesis, - tokens: TokenStream::from_iter(ts2), - }; - let inline_item = ast::AttrItem { - unsafety: ast::Safety::Default, - path: ast::Path::from_ident(Ident::with_dummy_span(sym::inline)), - args: ast::AttrArgs::Delimited(never_arg), - tokens: None, - }; - let inline_never_attr = P(ast::NormalAttr { item: inline_item, tokens: None }); let new_id = ecx.sess.psess.attr_id_generator.mk_attr_id(); let attr = outer_normal_attr(&rustc_ad_attr, new_id, span); - let new_id = ecx.sess.psess.attr_id_generator.mk_attr_id(); - let inline_never = outer_normal_attr(&inline_never_attr, new_id, span); - // We're avoid duplicating the attributes `#[rustc_autodiff]` and `#[inline(never)]`. + // We're avoid duplicating the attribute `#[rustc_autodiff]`. fn same_attribute(attr: &ast::AttrKind, item: &ast::AttrKind) -> bool { match (attr, item) { (ast::AttrKind::Normal(a), ast::AttrKind::Normal(b)) => { @@ -388,18 +370,12 @@ mod llvm_enzyme { if !iitem.attrs.iter().any(|a| same_attribute(&a.kind, &attr.kind)) { iitem.attrs.push(attr); } - if !iitem.attrs.iter().any(|a| same_attribute(&a.kind, &inline_never.kind)) { - iitem.attrs.push(inline_never.clone()); - } Annotatable::Item(iitem.clone()) } Annotatable::AssocItem(ref mut assoc_item, i @ Impl { .. }) => { if !assoc_item.attrs.iter().any(|a| same_attribute(&a.kind, &attr.kind)) { assoc_item.attrs.push(attr); } - if !assoc_item.attrs.iter().any(|a| same_attribute(&a.kind, &inline_never.kind)) { - assoc_item.attrs.push(inline_never.clone()); - } Annotatable::AssocItem(assoc_item.clone(), i) } Annotatable::Stmt(ref mut stmt) => { @@ -408,10 +384,6 @@ mod llvm_enzyme { if !iitem.attrs.iter().any(|a| same_attribute(&a.kind, &attr.kind)) { iitem.attrs.push(attr); } - if !iitem.attrs.iter().any(|a| same_attribute(&a.kind, &inline_never.kind)) - { - iitem.attrs.push(inline_never.clone()); - } } _ => unreachable!("stmt kind checked previously"), }; @@ -429,12 +401,13 @@ mod llvm_enzyme { tokens: ts, }); + let new_id = ecx.sess.psess.attr_id_generator.mk_attr_id(); let d_attr = outer_normal_attr(&rustc_ad_attr, new_id, span); let d_annotatable = match &item { Annotatable::AssocItem(_, _) => { - let assoc_item: AssocItemKind = ast::AssocItemKind::Fn(asdf); + let assoc_item: AssocItemKind = ast::AssocItemKind::Fn(d_fn); let d_fn = P(ast::AssocItem { - attrs: thin_vec![d_attr, inline_never], + attrs: thin_vec![d_attr], id: ast::DUMMY_NODE_ID, span, vis, @@ -444,13 +417,13 @@ mod llvm_enzyme { Annotatable::AssocItem(d_fn, Impl { of_trait: false }) } Annotatable::Item(_) => { - let mut d_fn = ecx.item(span, thin_vec![d_attr, inline_never], ItemKind::Fn(asdf)); + let mut d_fn = ecx.item(span, thin_vec![d_attr], ItemKind::Fn(d_fn)); d_fn.vis = vis; Annotatable::Item(d_fn) } Annotatable::Stmt(_) => { - let mut d_fn = ecx.item(span, thin_vec![d_attr, inline_never], ItemKind::Fn(asdf)); + let mut d_fn = ecx.item(span, thin_vec![d_attr], ItemKind::Fn(d_fn)); d_fn.vis = vis; Annotatable::Stmt(P(ast::Stmt { @@ -485,76 +458,94 @@ mod llvm_enzyme { ty } - // Will generate a body of the type: + // Generate `enzyme_autodiff` intrinsic call // ``` - // { - // unsafe { - // asm!("NOP"); - // } - // ::core::hint::black_box(primal(args)); - // ::core::hint::black_box((args, ret)); - // - // } + // std::intrinsics::enzyme_autodiff(source, diff, (args)) // ``` - fn init_body_helper( + fn call_enzyme_autodiff( ecx: &ExtCtxt<'_>, - span: Span, primal: Ident, - new_names: &[String], - sig_span: Span, - new_decl_span: Span, - idents: &[Ident], - errored: bool, + diff: Ident, + span: Span, + d_sig: &FnSig, generics: &Generics, - ) -> (P, P, P, P) { - let blackbox_path = ecx.std_path(&[sym::hint, sym::black_box]); - let noop = ast::InlineAsm { - asm_macro: ast::AsmMacro::Asm, - template: vec![ast::InlineAsmTemplatePiece::String("NOP".into())], - template_strs: Box::new([]), - operands: vec![], - clobber_abis: vec![], - options: ast::InlineAsmOptions::PURE | ast::InlineAsmOptions::NOMEM, - line_spans: vec![], - }; - let noop_expr = ecx.expr_asm(span, P(noop)); - let unsf = ast::BlockCheckMode::Unsafe(ast::UnsafeSource::CompilerGenerated); - let unsf_block = ast::Block { - stmts: thin_vec![ecx.stmt_semi(noop_expr)], - id: ast::DUMMY_NODE_ID, - tokens: None, - rules: unsf, + is_impl: bool, + ) -> rustc_ast::Stmt { + let primal_path_expr = gen_turbofish_expr(ecx, primal, generics, span, is_impl); + let diff_path_expr = gen_turbofish_expr(ecx, diff, generics, span, is_impl); + + let tuple_expr = ecx.expr_tuple( span, - }; - let unsf_expr = ecx.expr_block(P(unsf_block)); - let blackbox_call_expr = ecx.expr_path(ecx.path(span, blackbox_path)); - let primal_call = gen_primal_call(ecx, span, primal, idents, generics); - let black_box_primal_call = ecx.expr_call( - new_decl_span, - blackbox_call_expr.clone(), - thin_vec![primal_call.clone()], + d_sig + .decl + .inputs + .iter() + .map(|arg| match arg.pat.kind { + PatKind::Ident(_, ident, _) => ecx.expr_path(ecx.path_ident(span, ident)), + _ => todo!(), + }) + .collect::>() + .into(), ); - let tup_args = new_names - .iter() - .map(|arg| ecx.expr_path(ecx.path_ident(span, Ident::from_str(arg)))) - .collect(); - let black_box_remaining_args = ecx.expr_call( - sig_span, - blackbox_call_expr.clone(), - thin_vec![ecx.expr_tuple(sig_span, tup_args)], + let enzyme_path_idents = ecx.std_path(&[sym::intrinsics, sym::enzyme_autodiff]); + let enzyme_path = ecx.path(span, enzyme_path_idents); + let call_expr = ecx.expr_call( + span, + ecx.expr_path(enzyme_path), + vec![primal_path_expr, diff_path_expr, tuple_expr].into(), ); - let mut body = ecx.block(span, ThinVec::new()); - body.stmts.push(ecx.stmt_semi(unsf_expr)); + ecx.stmt_expr(call_expr) + } - // This uses primal args which won't be available if we errored before - if !errored { - body.stmts.push(ecx.stmt_semi(black_box_primal_call.clone())); - } - body.stmts.push(ecx.stmt_semi(black_box_remaining_args)); + // Generate turbofish expression from fn name and generics + // Given `foo` and `` params, gen `foo::` + fn gen_turbofish_expr( + ecx: &ExtCtxt<'_>, + ident: Ident, + generics: &Generics, + span: Span, + is_impl: bool, + ) -> P { + let generic_args = generics + .params + .iter() + .filter_map(|p| match &p.kind { + GenericParamKind::Type { .. } => { + let path = ast::Path::from_ident(p.ident); + let ty = ecx.ty_path(path); + Some(AngleBracketedArg::Arg(GenericArg::Type(ty))) + } + GenericParamKind::Const { .. } => { + let expr = ecx.expr_path(ast::Path::from_ident(p.ident)); + let anon_const = AnonConst { id: ast::DUMMY_NODE_ID, value: expr }; + Some(AngleBracketedArg::Arg(GenericArg::Const(anon_const))) + } + GenericParamKind::Lifetime { .. } => None, + }) + .collect::>(); + + let args: AngleBracketedArgs = AngleBracketedArgs { span, args: generic_args }; + + let segment = PathSegment { + ident, + id: ast::DUMMY_NODE_ID, + args: Some(P(GenericArgs::AngleBracketed(args))), + }; - (body, primal_call, black_box_primal_call, blackbox_call_expr) + let segments = if is_impl { + thin_vec![ + PathSegment { ident: Ident::from_str("Self"), id: ast::DUMMY_NODE_ID, args: None }, + segment, + ] + } else { + thin_vec![segment] + }; + + let path = Path { span, segments, tokens: None }; + + ecx.expr_path(path) } /// We only want this function to type-check, since we will replace the body @@ -567,202 +558,33 @@ mod llvm_enzyme { /// from optimizing any arguments away. fn gen_enzyme_body( ecx: &ExtCtxt<'_>, - x: &AutoDiffAttrs, - n_active: u32, - sig: &ast::FnSig, d_sig: &ast::FnSig, primal: Ident, - new_names: &[String], span: Span, - sig_span: Span, - idents: Vec, - errored: bool, + diff_ident: Ident, generics: &Generics, + is_impl: bool, ) -> P { let new_decl_span = d_sig.span; - // Just adding some default inline-asm and black_box usages to prevent early inlining - // and optimizations which alter the function signature. - // - // The bb_primal_call is the black_box call of the primal function. We keep it around, - // since it has the convenient property of returning the type of the primal function, - // Remember, we only care to match types here. - // No matter which return we pick, we always wrap it into a std::hint::black_box call, - // to prevent rustc from propagating it into the caller. - let (mut body, primal_call, bb_primal_call, bb_call_expr) = init_body_helper( - ecx, + // Add a call to the primal function to prevent it from being inlined + // and call `enzyme_autodiff` intrinsic (this also covers the return type) + let body = ecx.block( span, - primal, - new_names, - sig_span, - new_decl_span, - &idents, - errored, - generics, + thin_vec![call_enzyme_autodiff( + ecx, + primal, + diff_ident, + new_decl_span, + d_sig, + generics, + is_impl, + )], ); - if !has_ret(&d_sig.decl.output) { - // there is no return type that we have to match, () works fine. - return body; - } - - // Everything from here onwards just tries to fulfil the return type. Fun! - - // having an active-only return means we'll drop the original return type. - // So that can be treated identical to not having one in the first place. - let primal_ret = has_ret(&sig.decl.output) && !x.has_active_only_ret(); - - if primal_ret && n_active == 0 && x.mode.is_rev() { - // We only have the primal ret. - body.stmts.push(ecx.stmt_expr(bb_primal_call)); - return body; - } - - if !primal_ret && n_active == 1 { - // Again no tuple return, so return default float val. - let ty = match d_sig.decl.output { - FnRetTy::Ty(ref ty) => ty.clone(), - FnRetTy::Default(span) => { - panic!("Did not expect Default ret ty: {:?}", span); - } - }; - let arg = ty.kind.is_simple_path().unwrap(); - let tmp = ecx.def_site_path(&[arg, kw::Default]); - let default_call_expr = ecx.expr_path(ecx.path(span, tmp)); - let default_call_expr = ecx.expr_call(new_decl_span, default_call_expr, thin_vec![]); - body.stmts.push(ecx.stmt_expr(default_call_expr)); - return body; - } - - let mut exprs: P = primal_call; - let d_ret_ty = match d_sig.decl.output { - FnRetTy::Ty(ref ty) => ty.clone(), - FnRetTy::Default(span) => { - panic!("Did not expect Default ret ty: {:?}", span); - } - }; - if x.mode.is_fwd() { - // Fwd mode is easy. If the return activity is Const, we support arbitrary types. - // Otherwise, we only support a scalar, a pair of scalars, or an array of scalars. - // We checked that (on a best-effort base) in the preceding gen_enzyme_decl function. - // In all three cases, we can return `std::hint::black_box(::default())`. - if x.ret_activity == DiffActivity::Const { - // Here we call the primal function, since our dummy function has the same return - // type due to the Const return activity. - exprs = ecx.expr_call(new_decl_span, bb_call_expr, thin_vec![exprs]); - } else { - let q = QSelf { ty: d_ret_ty, path_span: span, position: 0 }; - let y = ExprKind::Path( - Some(P(q)), - ecx.path_ident(span, Ident::with_dummy_span(kw::Default)), - ); - let default_call_expr = ecx.expr(span, y); - let default_call_expr = - ecx.expr_call(new_decl_span, default_call_expr, thin_vec![]); - exprs = ecx.expr_call(new_decl_span, bb_call_expr, thin_vec![default_call_expr]); - } - } else if x.mode.is_rev() { - if x.width == 1 { - // We either have `-> ArbitraryType` or `-> (ArbitraryType, repeated_float_scalars)`. - match d_ret_ty.kind { - TyKind::Tup(ref args) => { - // We have a tuple return type. We need to create a tuple of the same size - // and fill it with default values. - let mut exprs2 = thin_vec![exprs]; - for arg in args.iter().skip(1) { - let arg = arg.kind.is_simple_path().unwrap(); - let tmp = ecx.def_site_path(&[arg, kw::Default]); - let default_call_expr = ecx.expr_path(ecx.path(span, tmp)); - let default_call_expr = - ecx.expr_call(new_decl_span, default_call_expr, thin_vec![]); - exprs2.push(default_call_expr); - } - exprs = ecx.expr_tuple(new_decl_span, exprs2); - } - _ => { - // Interestingly, even the `-> ArbitraryType` case - // ends up getting matched and handled correctly above, - // so we don't have to handle any other case for now. - panic!("Unsupported return type: {:?}", d_ret_ty); - } - } - } - exprs = ecx.expr_call(new_decl_span, bb_call_expr, thin_vec![exprs]); - } else { - unreachable!("Unsupported mode: {:?}", x.mode); - } - - body.stmts.push(ecx.stmt_expr(exprs)); - body } - fn gen_primal_call( - ecx: &ExtCtxt<'_>, - span: Span, - primal: Ident, - idents: &[Ident], - generics: &Generics, - ) -> P { - let has_self = idents.len() > 0 && idents[0].name == kw::SelfLower; - - if has_self { - let args: ThinVec<_> = - idents[1..].iter().map(|arg| ecx.expr_path(ecx.path_ident(span, *arg))).collect(); - let self_expr = ecx.expr_self(span); - ecx.expr_method_call(span, self_expr, primal, args) - } else { - let args: ThinVec<_> = - idents.iter().map(|arg| ecx.expr_path(ecx.path_ident(span, *arg))).collect(); - let mut primal_path = ecx.path_ident(span, primal); - - let is_generic = !generics.params.is_empty(); - - match (is_generic, primal_path.segments.last_mut()) { - (true, Some(function_path)) => { - let primal_generic_types = generics - .params - .iter() - .filter(|param| matches!(param.kind, ast::GenericParamKind::Type { .. })); - - let generated_generic_types = primal_generic_types - .map(|type_param| { - let generic_param = TyKind::Path( - None, - ast::Path { - span, - segments: thin_vec![ast::PathSegment { - ident: type_param.ident, - args: None, - id: ast::DUMMY_NODE_ID, - }], - tokens: None, - }, - ); - - ast::AngleBracketedArg::Arg(ast::GenericArg::Type(P(ast::Ty { - id: type_param.id, - span, - kind: generic_param, - tokens: None, - }))) - }) - .collect(); - - function_path.args = - Some(P(ast::GenericArgs::AngleBracketed(ast::AngleBracketedArgs { - span, - args: generated_generic_types, - }))); - } - _ => {} - } - - let primal_call_expr = ecx.expr_path(primal_path); - ecx.expr_call(span, primal_call_expr, args) - } - } - // Generate the new function declaration. Const arguments are kept as is. Duplicated arguments must // be pointers or references. Those receive a shadow argument, which is a mutable reference/pointer. // Active arguments must be scalars. Their shadow argument is added to the return type (and will be @@ -779,7 +601,7 @@ mod llvm_enzyme { sig: &ast::FnSig, x: &AutoDiffAttrs, span: Span, - ) -> (ast::FnSig, Vec, Vec, bool) { + ) -> ast::FnSig { let dcx = ecx.sess.dcx(); let has_ret = has_ret(&sig.decl.output); let sig_args = sig.decl.inputs.len() + if has_ret { 1 } else { 0 }; @@ -791,7 +613,7 @@ mod llvm_enzyme { found: num_activities, }); // This is not the right signature, but we can continue parsing. - return (sig.clone(), vec![], vec![], true); + return sig.clone(); } assert!(sig.decl.inputs.len() == x.input_activity.len()); assert!(has_ret == x.has_ret_activity()); @@ -834,7 +656,7 @@ mod llvm_enzyme { if errors { // This is not the right signature, but we can continue parsing. - return (sig.clone(), new_inputs, idents, true); + return sig.clone(); } let unsafe_activities = x @@ -1042,7 +864,7 @@ mod llvm_enzyme { } let d_sig = FnSig { header: d_header, decl: d_decl, span }; trace!("Generated signature: {:?}", d_sig); - (d_sig, new_inputs, idents, false) + d_sig } } diff --git a/compiler/rustc_codegen_gcc/src/lib.rs b/compiler/rustc_codegen_gcc/src/lib.rs index 613315f77a6b3..9e08d8f297223 100644 --- a/compiler/rustc_codegen_gcc/src/lib.rs +++ b/compiler/rustc_codegen_gcc/src/lib.rs @@ -92,7 +92,6 @@ use gccjit::{CType, Context, OptimizationLevel}; #[cfg(feature = "master")] use gccjit::{TargetInfo, Version}; use rustc_ast::expand::allocator::AllocatorKind; -use rustc_ast::expand::autodiff_attrs::AutoDiffItem; use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule}; use rustc_codegen_ssa::back::write::{ CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn, @@ -362,12 +361,7 @@ impl WriteBackendMethods for GccCodegenBackend { _exported_symbols_for_lto: &[String], each_linked_rlib_for_lto: &[PathBuf], modules: Vec>, - diff_fncs: Vec, ) -> Result, FatalError> { - if !diff_fncs.is_empty() { - unimplemented!(); - } - back::lto::run_fat(cgcx, each_linked_rlib_for_lto, modules) } diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs index 829b3c513c258..e2df3265f6f7d 100644 --- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs +++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs @@ -1,40 +1,92 @@ use std::ptr; -use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, AutoDiffItem, DiffActivity, DiffMode}; -use rustc_codegen_ssa::ModuleCodegen; +use rustc_ast::expand::autodiff_attrs::{AutoDiffAttrs, DiffActivity, DiffMode}; use rustc_codegen_ssa::common::TypeKind; -use rustc_codegen_ssa::traits::BaseTypeCodegenMethods; -use rustc_errors::FatalError; -use rustc_middle::bug; -use tracing::{debug, trace}; +use rustc_codegen_ssa::traits::{BaseTypeCodegenMethods, BuilderMethods}; +use rustc_middle::ty::{PseudoCanonicalInput, Ty, TyCtxt, TypingEnv}; +use rustc_middle::{bug, ty}; +use tracing::debug; -use crate::back::write::llvm_err; -use crate::builder::{SBuilder, UNNAMED}; +use crate::builder::{Builder, PlaceRef, UNNAMED}; use crate::context::SimpleCx; use crate::declare::declare_simple_fn; -use crate::errors::{AutoDiffWithoutEnable, LlvmError}; -use crate::llvm::AttributePlace::Function; -use crate::llvm::{Metadata, True}; +use crate::llvm; +use crate::llvm::{Metadata, True, Type}; use crate::value::Value; -use crate::{CodegenContext, LlvmCodegenBackend, ModuleLlvm, attributes, llvm}; -fn get_params(fnc: &Value) -> Vec<&Value> { - let param_num = llvm::LLVMCountParams(fnc) as usize; - let mut fnc_args: Vec<&Value> = vec![]; - fnc_args.reserve(param_num); - unsafe { - llvm::LLVMGetParams(fnc, fnc_args.as_mut_ptr()); - fnc_args.set_len(param_num); +pub(crate) fn adjust_activity_to_abi<'tcx>( + tcx: TyCtxt<'tcx>, + fn_ty: Ty<'tcx>, + da: &mut Vec, +) { + if !matches!(fn_ty.kind(), ty::FnDef(..)) { + bug!("expected fn def for autodiff, got {:?}", fn_ty); } - fnc_args -} -fn has_sret(fnc: &Value) -> bool { - let num_args = llvm::LLVMCountParams(fnc) as usize; - if num_args == 0 { - false - } else { - unsafe { llvm::LLVMRustHasAttributeAtIndex(fnc, 0, llvm::AttributeKind::StructRet) } + // We don't actually pass the types back into the type system. + // All we do is decide how to handle the arguments. + let sig = fn_ty.fn_sig(tcx).skip_binder(); + + let mut new_activities = vec![]; + let mut new_positions = vec![]; + for (i, ty) in sig.inputs().iter().enumerate() { + if let Some(inner_ty) = ty.builtin_deref(true) { + if inner_ty.is_slice() { + // Now we need to figure out the size of each slice element in memory to allow + // safety checks and usability improvements in the backend. + let sty = match inner_ty.builtin_index() { + Some(sty) => sty, + None => { + panic!("slice element type unknown"); + } + }; + let pci = PseudoCanonicalInput { + typing_env: TypingEnv::fully_monomorphized(), + value: sty, + }; + + let layout = tcx.layout_of(pci); + let elem_size = match layout { + Ok(layout) => layout.size, + Err(_) => { + bug!("autodiff failed to compute slice element size"); + } + }; + let elem_size: u32 = elem_size.bytes() as u32; + + // We know that the length will be passed as extra arg. + if !da.is_empty() { + // We are looking at a slice. The length of that slice will become an + // extra integer on llvm level. Integers are always const. + // However, if the slice get's duplicated, we want to know to later check the + // size. So we mark the new size argument as FakeActivitySize. + // There is one FakeActivitySize per slice, so for convenience we store the + // slice element size in bytes in it. We will use the size in the backend. + let activity = match da[i] { + DiffActivity::DualOnly + | DiffActivity::Dual + | DiffActivity::Dualv + | DiffActivity::DuplicatedOnly + | DiffActivity::Duplicated => { + DiffActivity::FakeActivitySize(Some(elem_size)) + } + DiffActivity::Const => DiffActivity::Const, + _ => bug!("unexpected activity for ptr/ref"), + }; + new_activities.push(activity); + new_positions.push(i + 1); + } + + continue; + } + } + } + // now add the extra activities coming from slices + // Reverse order to not invalidate the indices + for _ in 0..new_activities.len() { + let pos = new_positions.pop().unwrap(); + let activity = new_activities.pop().unwrap(); + da.insert(pos, activity); } } @@ -48,14 +100,13 @@ fn has_sret(fnc: &Value) -> bool { // need to match those. // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it // using iterators and peek()? -fn match_args_from_caller_to_enzyme<'ll>( +fn match_args_from_caller_to_enzyme<'ll, 'tcx>( cx: &SimpleCx<'ll>, - builder: &SBuilder<'ll, 'll>, + builder: &mut Builder<'_, 'll, 'tcx>, width: u32, args: &mut Vec<&'ll llvm::Value>, inputs: &[DiffActivity], outer_args: &[&'ll llvm::Value], - has_sret: bool, ) { debug!("matching autodiff arguments"); // We now handle the issue that Rust level arguments not always match the llvm-ir level @@ -67,14 +118,6 @@ fn match_args_from_caller_to_enzyme<'ll>( let mut outer_pos: usize = 0; let mut activity_pos = 0; - if has_sret { - // Then the first outer arg is the sret pointer. Enzyme doesn't know about sret, so the - // inner function will still return something. We increase our outer_pos by one, - // and once we're done with all other args we will take the return of the inner call and - // update the sret pointer with it - outer_pos = 1; - } - let enzyme_const = cx.create_metadata(b"enzyme_const"); let enzyme_out = cx.create_metadata(b"enzyme_out"); let enzyme_dup = cx.create_metadata(b"enzyme_dup"); @@ -193,92 +236,6 @@ fn match_args_from_caller_to_enzyme<'ll>( } } -// On LLVM-IR, we can luckily declare __enzyme_ functions without specifying the input -// arguments. We do however need to declare them with their correct return type. -// We already figured the correct return type out in our frontend, when generating the outer_fn, -// so we can now just go ahead and use that. This is not always trivial, e.g. because sret. -// Beyond sret, this article describes our challenges nicely: -// -// I.e. (i32, f32) will get merged into i64, but we don't handle that yet. -fn compute_enzyme_fn_ty<'ll>( - cx: &SimpleCx<'ll>, - attrs: &AutoDiffAttrs, - fn_to_diff: &'ll Value, - outer_fn: &'ll Value, -) -> &'ll llvm::Type { - let fn_ty = cx.get_type_of_global(outer_fn); - let mut ret_ty = cx.get_return_type(fn_ty); - - let has_sret = has_sret(outer_fn); - - if has_sret { - // Now we don't just forward the return type, so we have to figure it out based on the - // primal return type, in combination with the autodiff settings. - let fn_ty = cx.get_type_of_global(fn_to_diff); - let inner_ret_ty = cx.get_return_type(fn_ty); - - let void_ty = unsafe { llvm::LLVMVoidTypeInContext(cx.llcx) }; - if inner_ret_ty == void_ty { - // This indicates that even the inner function has an sret. - // Right now I only look for an sret in the outer function. - // This *probably* needs some extra handling, but I never ran - // into such a case. So I'll wait for user reports to have a test case. - bug!("sret in inner function"); - } - - if attrs.width == 1 { - // Enzyme returns a struct of style: - // `{ original_ret(if requested), float, float, ... }` - let mut struct_elements = vec![]; - if attrs.has_primal_ret() { - struct_elements.push(inner_ret_ty); - } - // Next, we push the list of active floats, since they will be lowered to `enzyme_out`, - // and therefore part of the return struct. - let param_tys = cx.func_params_types(fn_ty); - for (act, param_ty) in attrs.input_activity.iter().zip(param_tys) { - if matches!(act, DiffActivity::Active) { - // Now find the float type at position i based on the fn_ty, - // to know what (f16/f32/f64/...) to add to the struct. - struct_elements.push(param_ty); - } - } - ret_ty = cx.type_struct(&struct_elements, false); - } else { - // First we check if we also have to deal with the primal return. - match attrs.mode { - DiffMode::Forward => match attrs.ret_activity { - DiffActivity::Dual => { - let arr_ty = - unsafe { llvm::LLVMArrayType2(inner_ret_ty, attrs.width as u64 + 1) }; - ret_ty = arr_ty; - } - DiffActivity::DualOnly => { - let arr_ty = - unsafe { llvm::LLVMArrayType2(inner_ret_ty, attrs.width as u64) }; - ret_ty = arr_ty; - } - DiffActivity::Const => { - todo!("Not sure, do we need to do something here?"); - } - _ => { - bug!("unreachable"); - } - }, - DiffMode::Reverse => { - todo!("Handle sret for reverse mode"); - } - _ => { - bug!("unreachable"); - } - } - } - } - - // LLVM can figure out the input types on it's own, so we take a shortcut here. - unsafe { llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True) } -} - /// When differentiating `fn_to_diff`, take a `outer_fn` and generate another /// function with expected naming and calling conventions[^1] which will be /// discovered by the enzyme LLVM pass and its body populated with the differentiated @@ -288,11 +245,15 @@ fn compute_enzyme_fn_ty<'ll>( /// [^1]: // FIXME(ZuseZ4): `outer_fn` should include upstream safety checks to // cover some assumptions of enzyme/autodiff, which could lead to UB otherwise. -fn generate_enzyme_call<'ll>( +pub(crate) fn generate_enzyme_call<'ll, 'tcx>( + builder: &mut Builder<'_, 'll, 'tcx>, cx: &SimpleCx<'ll>, fn_to_diff: &'ll Value, - outer_fn: &'ll Value, + outer_name: &str, + ret_ty: &'ll Type, + fn_args: &[&'ll Value], attrs: AutoDiffAttrs, + dest: PlaceRef<'tcx, &'ll Value>, ) { // We have to pick the name depending on whether we want forward or reverse mode autodiff. let mut ad_name: String = match attrs.mode { @@ -302,11 +263,9 @@ fn generate_enzyme_call<'ll>( } .to_string(); - // add outer_fn name to ad_name to make it unique, in case users apply autodiff to multiple + // add outer_name to ad_name to make it unique, in case users apply autodiff to multiple // functions. Unwrap will only panic, if LLVM gave us an invalid string. - let name = llvm::get_value_name(outer_fn); - let outer_fn_name = std::str::from_utf8(&name).unwrap(); - ad_name.push_str(outer_fn_name); + ad_name.push_str(outer_name); // Let us assume the user wrote the following function square: // @@ -316,14 +275,8 @@ fn generate_enzyme_call<'ll>( // %0 = fmul double %x, %x // ret double %0 // } - // ``` - // - // The user now applies autodiff to the function square, in which case fn_to_diff will be `square`. - // Our macro generates the following placeholder code (slightly simplified): // - // ```llvm // define double @dsquare(double %x) { - // ; placeholder code // return 0.0; // } // ``` @@ -340,175 +293,44 @@ fn generate_enzyme_call<'ll>( // ret double %0 // } // ``` - unsafe { - let enzyme_ty = compute_enzyme_fn_ty(cx, &attrs, fn_to_diff, outer_fn); - - // FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and - // think a bit more about what should go here. - let cc = llvm::LLVMGetFunctionCallConv(outer_fn); - let ad_fn = declare_simple_fn( - cx, - &ad_name, - llvm::CallConv::try_from(cc).expect("invalid callconv"), - llvm::UnnamedAddr::No, - llvm::Visibility::Default, - enzyme_ty, - ); - - // Otherwise LLVM might inline our temporary code before the enzyme pass has a chance to - // do it's work. - let attr = llvm::AttributeKind::NoInline.create_attr(cx.llcx); - attributes::apply_to_llfn(ad_fn, Function, &[attr]); - - // We add a made-up attribute just such that we can recognize it after AD to update - // (no)-inline attributes. We'll then also remove this attribute. - let enzyme_marker_attr = llvm::CreateAttrString(cx.llcx, "enzyme_marker"); - attributes::apply_to_llfn(outer_fn, Function, &[enzyme_marker_attr]); - - // first, remove all calls from fnc - let entry = llvm::LLVMGetFirstBasicBlock(outer_fn); - let br = llvm::LLVMRustGetTerminator(entry); - llvm::LLVMRustEraseInstFromParent(br); - - let last_inst = llvm::LLVMRustGetLastInstruction(entry).unwrap(); - let mut builder = SBuilder::build(cx, entry); - - let num_args = llvm::LLVMCountParams(&fn_to_diff); - let mut args = Vec::with_capacity(num_args as usize + 1); - args.push(fn_to_diff); - - let enzyme_primal_ret = cx.create_metadata(b"enzyme_primal_return"); - if matches!(attrs.ret_activity, DiffActivity::Dual | DiffActivity::Active) { - args.push(cx.get_metadata_value(enzyme_primal_ret)); - } - if attrs.width > 1 { - let enzyme_width = cx.create_metadata(b"enzyme_width"); - args.push(cx.get_metadata_value(enzyme_width)); - args.push(cx.get_const_int(cx.type_i64(), attrs.width as u64)); - } - - let has_sret = has_sret(outer_fn); - let outer_args: Vec<&llvm::Value> = get_params(outer_fn); - match_args_from_caller_to_enzyme( - &cx, - &builder, - attrs.width, - &mut args, - &attrs.input_activity, - &outer_args, - has_sret, - ); - - let call = builder.call(enzyme_ty, ad_fn, &args, None); - - // This part is a bit iffy. LLVM requires that a call to an inlineable function has some - // metadata attached to it, but we just created this code oota. Given that the - // differentiated function already has partly confusing metadata, and given that this - // affects nothing but the auttodiff IR, we take a shortcut and just steal metadata from the - // dummy code which we inserted at a higher level. - // FIXME(ZuseZ4): Work with Enzyme core devs to clarify what debug metadata issues we have, - // and how to best improve it for enzyme core and rust-enzyme. - let md_ty = cx.get_md_kind_id("dbg"); - if llvm::LLVMRustHasMetadata(last_inst, md_ty) { - let md = llvm::LLVMRustDIGetInstMetadata(last_inst) - .expect("failed to get instruction metadata"); - let md_todiff = cx.get_metadata_value(md); - llvm::LLVMSetMetadata(call, md_ty, md_todiff); - } else { - // We don't panic, since depending on whether we are in debug or release mode, we might - // have no debug info to copy, which would then be ok. - trace!("no dbg info"); - } - - // Now that we copied the metadata, get rid of dummy code. - llvm::LLVMRustEraseInstUntilInclusive(entry, last_inst); - - if cx.val_ty(call) == cx.type_void() || has_sret { - if has_sret { - // This is what we already have in our outer_fn (shortened): - // define void @_foo(ptr <..> sret([32 x i8]) initializes((0, 32)) %0, <...>) { - // %7 = call [4 x double] (...) @__enzyme_fwddiff_foo(ptr @square, metadata !"enzyme_width", i64 4, <...>) - // - // store [4 x double] %7, ptr %0, align 8 - // ret void - // } - - // now store the result of the enzyme call into the sret pointer. - let sret_ptr = outer_args[0]; - let call_ty = cx.val_ty(call); - if attrs.width == 1 { - assert_eq!(cx.type_kind(call_ty), TypeKind::Struct); - } else { - assert_eq!(cx.type_kind(call_ty), TypeKind::Array); - } - llvm::LLVMBuildStore(&builder.llbuilder, call, sret_ptr); - } - builder.ret_void(); - } else { - builder.ret(call); - } - - // Let's crash in case that we messed something up above and generated invalid IR. - llvm::LLVMRustVerifyFunction( - outer_fn, - llvm::LLVMRustVerifierFailureAction::LLVMAbortProcessAction, - ); - } -} - -pub(crate) fn differentiate<'ll>( - module: &'ll ModuleCodegen, - cgcx: &CodegenContext, - diff_items: Vec, -) -> Result<(), FatalError> { - for item in &diff_items { - trace!("{}", item); + let enzyme_ty = unsafe { llvm::LLVMFunctionType(ret_ty, ptr::null(), 0, True) }; + + // FIXME(ZuseZ4): the CC/Addr/Vis values are best effort guesses, we should look at tests and + // think a bit more about what should go here. + let cc = unsafe { llvm::LLVMGetFunctionCallConv(fn_to_diff) }; + let ad_fn = declare_simple_fn( + cx, + &ad_name, + llvm::CallConv::try_from(cc).expect("invalid callconv"), + llvm::UnnamedAddr::No, + llvm::Visibility::Default, + enzyme_ty, + ); + + let num_args = llvm::LLVMCountParams(&fn_to_diff); + let mut args = Vec::with_capacity(num_args as usize + 1); + args.push(fn_to_diff); + + let enzyme_primal_ret = cx.create_metadata(b"enzyme_primal_return"); + if matches!(attrs.ret_activity, DiffActivity::Dual | DiffActivity::Active) { + args.push(cx.get_metadata_value(enzyme_primal_ret)); } - - let diag_handler = cgcx.create_dcx(); - - let cx = SimpleCx::new(module.module_llvm.llmod(), module.module_llvm.llcx, cgcx.pointer_size); - - // First of all, did the user try to use autodiff without using the -Zautodiff=Enable flag? - if !diff_items.is_empty() - && !cgcx.opts.unstable_opts.autodiff.contains(&rustc_session::config::AutoDiff::Enable) - { - return Err(diag_handler.handle().emit_almost_fatal(AutoDiffWithoutEnable)); - } - - // Here we replace the placeholder code with the actual autodiff code, which calls Enzyme. - for item in diff_items.iter() { - let name = item.source.clone(); - let fn_def: Option<&llvm::Value> = cx.get_function(&name); - let Some(fn_def) = fn_def else { - return Err(llvm_err( - diag_handler.handle(), - LlvmError::PrepareAutoDiff { - src: item.source.clone(), - target: item.target.clone(), - error: "could not find source function".to_owned(), - }, - )); - }; - debug!(?item.target); - let fn_target: Option<&llvm::Value> = cx.get_function(&item.target); - let Some(fn_target) = fn_target else { - return Err(llvm_err( - diag_handler.handle(), - LlvmError::PrepareAutoDiff { - src: item.source.clone(), - target: item.target.clone(), - error: "could not find target function".to_owned(), - }, - )); - }; - - generate_enzyme_call(&cx, fn_def, fn_target, item.attrs.clone()); + if attrs.width > 1 { + let enzyme_width = cx.create_metadata(b"enzyme_width"); + args.push(cx.get_metadata_value(enzyme_width)); + args.push(cx.get_const_int(cx.type_i64(), attrs.width as u64)); } - // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts + match_args_from_caller_to_enzyme( + &cx, + builder, + attrs.width, + &mut args, + &attrs.input_activity, + fn_args, + ); - trace!("done with differentiate()"); + let call = builder.call(enzyme_ty, None, None, ad_fn, &args, None, None); - Ok(()) + builder.store_to_place(call, dest.val); } diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index ee77774c68832..54f45d1649ddd 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -8,7 +8,6 @@ use std::str; use rustc_abi::{HasDataLayout, Size, TargetDataLayout, VariantIdx}; use rustc_codegen_ssa::back::versioned_llvm_target; use rustc_codegen_ssa::base::{wants_msvc_seh, wants_wasm_eh}; -use rustc_codegen_ssa::common::TypeKind; use rustc_codegen_ssa::errors as ssa_errors; use rustc_codegen_ssa::traits::*; use rustc_data_structures::base_n::{ALPHANUMERIC_ONLY, ToBaseN}; @@ -654,10 +653,6 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { } } impl<'ll> SimpleCx<'ll> { - pub(crate) fn get_return_type(&self, ty: &'ll Type) -> &'ll Type { - assert_eq!(self.type_kind(ty), TypeKind::Function); - unsafe { llvm::LLVMGetReturnType(ty) } - } pub(crate) fn get_type_of_global(&self, val: &'ll Value) -> &'ll Type { unsafe { llvm::LLVMGlobalGetValueType(val) } } diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 7b27e496986ae..b1b55236cee85 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -3,24 +3,29 @@ use std::cmp::Ordering; use rustc_abi::{Align, BackendRepr, ExternAbi, Float, HasDataLayout, Primitive, Size}; use rustc_codegen_ssa::base::{compare_simd_types, wants_msvc_seh, wants_wasm_eh}; +use rustc_codegen_ssa::codegen_attrs::autodiff_attrs; use rustc_codegen_ssa::common::{IntPredicate, TypeKind}; use rustc_codegen_ssa::errors::{ExpectedPointerMutability, InvalidMonomorphization}; use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue}; use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue}; use rustc_codegen_ssa::traits::*; -use rustc_hir as hir; +use rustc_hir::def_id::LOCAL_CRATE; +use rustc_hir::{self as hir}; use rustc_middle::mir::BinOp; use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf}; -use rustc_middle::ty::{self, GenericArgsRef, Ty}; +use rustc_middle::ty::{self, GenericArgsRef, Instance, Ty, TyCtxt, TypingEnv}; use rustc_middle::{bug, span_bug}; use rustc_span::{Span, Symbol, sym}; -use rustc_symbol_mangling::mangle_internal_symbol; +use rustc_symbol_mangling::{mangle_internal_symbol, symbol_name_for_instance_in_crate}; +use rustc_target::callconv::PassMode; use rustc_target::spec::PanicStrategy; use tracing::debug; use crate::abi::FnAbiLlvmExt; use crate::builder::Builder; +use crate::builder::autodiff::{adjust_activity_to_abi, generate_enzyme_call}; use crate::context::CodegenCx; +use crate::errors::AutoDiffWithoutEnable; use crate::llvm::{self, Metadata}; use crate::type_::Type; use crate::type_of::LayoutLlvmExt; @@ -189,6 +194,10 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> { &[ptr, args[1].immediate()], ) } + sym::enzyme_autodiff => { + codegen_enzyme_autodiff(self, tcx, instance, args, result); + return Ok(()); + } sym::is_val_statically_known => { if let OperandValue::Immediate(imm) = args[0].val { self.call_intrinsic( @@ -1113,6 +1122,144 @@ fn get_rust_try_fn<'a, 'll, 'tcx>( rust_try } +fn codegen_enzyme_autodiff<'ll, 'tcx>( + bx: &mut Builder<'_, 'll, 'tcx>, + tcx: TyCtxt<'tcx>, + instance: ty::Instance<'tcx>, + args: &[OperandRef<'tcx, &'ll Value>], + result: PlaceRef<'tcx, &'ll Value>, +) { + if !tcx.sess.opts.unstable_opts.autodiff.contains(&rustc_session::config::AutoDiff::Enable) { + let _ = tcx.dcx().emit_almost_fatal(AutoDiffWithoutEnable); + } + + let fn_args = instance.args; + let callee_ty = instance.ty(tcx, bx.typing_env()); + + let sig = callee_ty.fn_sig(tcx); + let sig = tcx.normalize_erasing_late_bound_regions(bx.typing_env(), sig); + + let ret_ty = sig.output(); + let llret_ty = bx.layout_of(ret_ty).llvm_type(bx); + + // Get source, diff, and attrs + let (source_id, source_args) = match fn_args.into_type_list(tcx)[0].kind() { + ty::FnDef(def_id, source_params) => (def_id, source_params), + _ => bug!("invalid autodiff intrinsic args"), + }; + + let fn_source = match Instance::try_resolve(tcx, bx.cx.typing_env(), *source_id, source_args) { + Ok(Some(instance)) => instance, + Ok(None) => bug!( + "could not resolve ({:?}, {:?}) to a specific autodiff instance", + source_id, + source_args + ), + Err(_) => { + // An error has already been emitted + return; + } + }; + + let source_symbol = symbol_name_for_instance_in_crate(tcx, fn_source.clone(), LOCAL_CRATE); + let Some(fn_to_diff) = bx.cx.get_function(&source_symbol) else { + bug!("could not find source function") + }; + + let (diff_id, diff_args) = match fn_args.into_type_list(tcx)[1].kind() { + ty::FnDef(def_id, diff_args) => (def_id, diff_args), + _ => bug!("invalid args"), + }; + + let fn_diff = match Instance::try_resolve(tcx, bx.cx.typing_env(), *diff_id, diff_args) { + Ok(Some(instance)) => instance, + Ok(None) => bug!( + "could not resolve ({:?}, {:?}) to a specific autodiff instance", + diff_id, + diff_args + ), + Err(_) => { + // An error has already been emitted + return; + } + }; + + let val_arr = get_args_from_tuple(bx, args[2], fn_diff); + let diff_symbol = symbol_name_for_instance_in_crate(tcx, fn_diff.clone(), LOCAL_CRATE); + + let Some(mut diff_attrs) = autodiff_attrs(tcx, fn_diff.def_id()) else { + bug!("could not find autodiff attrs") + }; + + adjust_activity_to_abi( + tcx, + fn_source.ty(tcx, TypingEnv::fully_monomorphized()), + &mut diff_attrs.input_activity, + ); + + // Build body + generate_enzyme_call( + bx, + bx.cx, + fn_to_diff, + &diff_symbol, + llret_ty, + &val_arr, + diff_attrs.clone(), + result, + ); +} + +fn get_args_from_tuple<'ll, 'tcx>( + bx: &mut Builder<'_, 'll, 'tcx>, + tuple_op: OperandRef<'tcx, &'ll Value>, + fn_instance: Instance<'tcx>, +) -> Vec<&'ll Value> { + let cx = bx.cx; + let fn_abi = cx.fn_abi_of_instance(fn_instance, ty::List::empty()); + + match tuple_op.val { + OperandValue::Immediate(val) => vec![val], + OperandValue::Pair(v1, v2) => vec![v1, v2], + OperandValue::Ref(ptr) => { + let tuple_place = PlaceRef { val: ptr, layout: tuple_op.layout }; + + let mut result = Vec::with_capacity(fn_abi.args.len()); + let mut tuple_index = 0; + + for arg in &fn_abi.args { + match arg.mode { + PassMode::Ignore => {} + PassMode::Direct(_) | PassMode::Cast { .. } => { + let field = tuple_place.project_field(bx, tuple_index); + let llvm_ty = field.layout.llvm_type(bx.cx); + let val = bx.load(llvm_ty, field.val.llval, field.val.align); + result.push(val); + tuple_index += 1; + } + PassMode::Pair(_, _) => { + let field = tuple_place.project_field(bx, tuple_index); + let llvm_ty = field.layout.llvm_type(bx.cx); + let pair_val = bx.load(llvm_ty, field.val.llval, field.val.align); + result.push(bx.extract_value(pair_val, 0)); + result.push(bx.extract_value(pair_val, 1)); + tuple_index += 1; + } + PassMode::Indirect { .. } => { + let field = tuple_place.project_field(bx, tuple_index); + result.push(field.val.llval); + tuple_index += 1; + } + } + } + + result + } + + OperandValue::ZeroSized => bug!("unexpected ZeroSized argument in get_args_from_tuple"), + } +} + fn generic_simd_intrinsic<'ll, 'tcx>( bx: &mut Builder<'_, 'll, 'tcx>, name: Symbol, diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index ca84b6de8b11a..79e80db6f5554 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -30,7 +30,6 @@ use context::SimpleCx; use errors::ParseTargetMachineConfig; use llvm_util::target_config; use rustc_ast::expand::allocator::AllocatorKind; -use rustc_ast::expand::autodiff_attrs::AutoDiffItem; use rustc_codegen_ssa::back::lto::{SerializedModule, ThinModule}; use rustc_codegen_ssa::back::write::{ CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryConfig, TargetMachineFactoryFn, @@ -173,15 +172,10 @@ impl WriteBackendMethods for LlvmCodegenBackend { exported_symbols_for_lto: &[String], each_linked_rlib_for_lto: &[PathBuf], modules: Vec>, - diff_fncs: Vec, ) -> Result, FatalError> { let mut module = back::lto::run_fat(cgcx, exported_symbols_for_lto, each_linked_rlib_for_lto, modules)?; - if !diff_fncs.is_empty() { - builder::autodiff::differentiate(&module, cgcx, diff_fncs)?; - } - let dcx = cgcx.create_dcx(); let dcx = dcx.handle(); back::lto::run_pass_manager(cgcx, dcx, &mut module, false)?; diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index aa29afb7f5b11..2e8122798d169 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -7,7 +7,6 @@ use std::{fs, io, mem, str, thread}; use rustc_abi::Size; use rustc_ast::attr; -use rustc_ast::expand::autodiff_attrs::AutoDiffItem; use rustc_data_structures::fx::FxIndexMap; use rustc_data_structures::jobserver::{self, Acquired}; use rustc_data_structures::memmap::Mmap; @@ -38,7 +37,7 @@ use tracing::debug; use super::link::{self, ensure_removed}; use super::lto::{self, SerializedModule}; use crate::back::lto::check_lto_allowed; -use crate::errors::{AutodiffWithoutLto, ErrorCreatingRemarkDir}; +use crate::errors::ErrorCreatingRemarkDir; use crate::traits::*; use crate::{ CachedModuleCodegen, CodegenResults, CompiledModule, CrateInfo, ModuleCodegen, ModuleKind, @@ -454,7 +453,6 @@ pub(crate) fn start_async_codegen( backend: B, tcx: TyCtxt<'_>, target_cpu: String, - autodiff_items: &[AutoDiffItem], ) -> OngoingCodegen { let (coordinator_send, coordinator_receive) = channel(); @@ -473,7 +471,6 @@ pub(crate) fn start_async_codegen( backend.clone(), tcx, &crate_info, - autodiff_items, shared_emitter, codegen_worker_send, coordinator_receive, @@ -728,7 +725,6 @@ pub(crate) enum WorkItem { each_linked_rlib_for_lto: Vec, needs_fat_lto: Vec>, import_only_modules: Vec<(SerializedModule, WorkProduct)>, - autodiff: Vec, }, /// Performs thin-LTO on the given module. ThinLto(lto::ThinModule), @@ -1001,7 +997,6 @@ fn execute_fat_lto_work_item( each_linked_rlib_for_lto: &[PathBuf], mut needs_fat_lto: Vec>, import_only_modules: Vec<(SerializedModule, WorkProduct)>, - autodiff: Vec, module_config: &ModuleConfig, ) -> Result, FatalError> { for (module, wp) in import_only_modules { @@ -1013,7 +1008,6 @@ fn execute_fat_lto_work_item( exported_symbols_for_lto, each_linked_rlib_for_lto, needs_fat_lto, - autodiff, )?; let module = B::codegen(cgcx, module, module_config)?; Ok(WorkItemResult::Finished(module)) @@ -1105,7 +1099,6 @@ fn start_executing_work( backend: B, tcx: TyCtxt<'_>, crate_info: &CrateInfo, - autodiff_items: &[AutoDiffItem], shared_emitter: SharedEmitter, codegen_worker_send: Sender, coordinator_receive: Receiver>, @@ -1115,7 +1108,6 @@ fn start_executing_work( ) -> thread::JoinHandle> { let coordinator_send = tx_to_llvm_workers; let sess = tcx.sess; - let autodiff_items = autodiff_items.to_vec(); let mut each_linked_rlib_for_lto = Vec::new(); let mut each_linked_rlib_file_for_lto = Vec::new(); @@ -1448,7 +1440,6 @@ fn start_executing_work( each_linked_rlib_for_lto: each_linked_rlib_file_for_lto, needs_fat_lto, import_only_modules, - autodiff: autodiff_items.clone(), }, 0, )); @@ -1456,11 +1447,6 @@ fn start_executing_work( helper.request_token(); } } else { - if !autodiff_items.is_empty() { - let dcx = cgcx.create_dcx(); - dcx.handle().emit_fatal(AutodiffWithoutLto {}); - } - for (work, cost) in generate_thin_lto_work( &cgcx, &exported_symbols_for_lto, @@ -1795,7 +1781,6 @@ fn spawn_work<'a, B: ExtraBackendMethods>( each_linked_rlib_for_lto, needs_fat_lto, import_only_modules, - autodiff, } => { let _timer = cgcx .prof @@ -1806,7 +1791,6 @@ fn spawn_work<'a, B: ExtraBackendMethods>( &each_linked_rlib_for_lto, needs_fat_lto, import_only_modules, - autodiff, module_config, ) } diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs index b4556ced0b3fb..b483c01da59e5 100644 --- a/compiler/rustc_codegen_ssa/src/base.rs +++ b/compiler/rustc_codegen_ssa/src/base.rs @@ -647,7 +647,7 @@ pub fn codegen_crate( ) -> OngoingCodegen { // Skip crate items and just output metadata in -Z no-codegen mode. if tcx.sess.opts.unstable_opts.no_codegen || !tcx.sess.opts.output_types.should_codegen() { - let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu, &[]); + let ongoing_codegen = start_async_codegen(backend, tcx, target_cpu); ongoing_codegen.codegen_finished(tcx); @@ -665,8 +665,7 @@ pub fn codegen_crate( // Run the monomorphization collector and partition the collected items into // codegen units. - let MonoItemPartitions { codegen_units, autodiff_items, .. } = - tcx.collect_and_partition_mono_items(()); + let MonoItemPartitions { codegen_units, .. } = tcx.collect_and_partition_mono_items(()); // Force all codegen_unit queries so they are already either red or green // when compile_codegen_unit accesses them. We are not able to re-execute @@ -679,7 +678,7 @@ pub fn codegen_crate( } } - let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu, autodiff_items); + let ongoing_codegen = start_async_codegen(backend.clone(), tcx, target_cpu); // Codegen an allocator shim, if necessary. if let Some(kind) = allocator_kind_for_codegen(tcx) { diff --git a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs index 7f54a47327af8..cfd314330e881 100644 --- a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs +++ b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs @@ -210,14 +210,6 @@ fn process_builtin_attrs( let mut interesting_spans = InterestingAttributeDiagnosticSpans::default(); let rust_target_features = tcx.rust_target_features(LOCAL_CRATE); - // If our rustc version supports autodiff/enzyme, then we call our handler - // to check for any `#[rustc_autodiff(...)]` attributes. - // FIXME(jdonszelmann): merge with loop below - if cfg!(llvm_enzyme) { - let ad = autodiff_attrs(tcx, did.into()); - codegen_fn_attrs.autodiff_item = ad; - } - for attr in attrs.iter() { if let hir::Attribute::Parsed(p) = attr { match p { @@ -624,7 +616,7 @@ fn inherited_align<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> Option { /// placeholder functions. We wrote the rustc_autodiff attributes ourself, so this should never /// panic, unless we introduced a bug when parsing the autodiff macro. //FIXME(jdonszelmann): put in the main loop. No need to have two..... :/ Let's do that when we make autodiff parsed. -fn autodiff_attrs(tcx: TyCtxt<'_>, id: DefId) -> Option { +pub fn autodiff_attrs(tcx: TyCtxt<'_>, id: DefId) -> Option { let attrs = tcx.get_attrs(id, sym::rustc_autodiff); let attrs = attrs.filter(|attr| attr.has_name(sym::rustc_autodiff)).collect::>(); diff --git a/compiler/rustc_codegen_ssa/src/traits/write.rs b/compiler/rustc_codegen_ssa/src/traits/write.rs index f391c198e1a10..c29ad90735b7b 100644 --- a/compiler/rustc_codegen_ssa/src/traits/write.rs +++ b/compiler/rustc_codegen_ssa/src/traits/write.rs @@ -1,6 +1,5 @@ use std::path::PathBuf; -use rustc_ast::expand::autodiff_attrs::AutoDiffItem; use rustc_errors::{DiagCtxtHandle, FatalError}; use rustc_middle::dep_graph::WorkProduct; @@ -23,7 +22,6 @@ pub trait WriteBackendMethods: Clone + 'static { exported_symbols_for_lto: &[String], each_linked_rlib_for_lto: &[PathBuf], modules: Vec>, - diff_fncs: Vec, ) -> Result, FatalError>; /// Performs thin LTO by performing necessary global analysis and returning two /// lists, one of the modules that need optimization and another for modules that diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index 6e5fe3823ab51..bca73b2135ccc 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -135,6 +135,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi | sym::round_ties_even_f32 | sym::round_ties_even_f64 | sym::round_ties_even_f128 + | sym::enzyme_autodiff | sym::const_eval_select => hir::Safety::Safe, _ => hir::Safety::Unsafe, }; @@ -198,6 +199,7 @@ pub(crate) fn check_intrinsic_type( let safety = intrinsic_operation_unsafety(tcx, intrinsic_id); let n_lts = 0; let (n_tps, n_cts, inputs, output) = match intrinsic_name { + sym::enzyme_autodiff => (4, 0, vec![param(0), param(1), param(2)], param(3)), sym::abort => (0, 0, vec![], tcx.types.never), sym::unreachable => (0, 0, vec![], tcx.types.never), sym::breakpoint => (0, 0, vec![], tcx.types.unit), diff --git a/compiler/rustc_middle/src/middle/codegen_fn_attrs.rs b/compiler/rustc_middle/src/middle/codegen_fn_attrs.rs index 94384e64afd15..3b290d90da7a8 100644 --- a/compiler/rustc_middle/src/middle/codegen_fn_attrs.rs +++ b/compiler/rustc_middle/src/middle/codegen_fn_attrs.rs @@ -1,7 +1,6 @@ use std::borrow::Cow; use rustc_abi::Align; -use rustc_ast::expand::autodiff_attrs::AutoDiffAttrs; use rustc_hir::attrs::{InlineAttr, InstructionSetAttr, OptimizeAttr}; use rustc_macros::{HashStable, TyDecodable, TyEncodable}; use rustc_span::Symbol; @@ -75,8 +74,6 @@ pub struct CodegenFnAttrs { /// The `#[patchable_function_entry(...)]` attribute. Indicates how many nops should be around /// the function entry. pub patchable_function_entry: Option, - /// For the `#[autodiff]` macros. - pub autodiff_item: Option, } #[derive(Copy, Clone, Debug, TyEncodable, TyDecodable, HashStable)] @@ -182,7 +179,6 @@ impl CodegenFnAttrs { instruction_set: None, alignment: None, patchable_function_entry: None, - autodiff_item: None, } } diff --git a/compiler/rustc_middle/src/mir/mono.rs b/compiler/rustc_middle/src/mir/mono.rs index e5864660575c5..613f0f7a90cd1 100644 --- a/compiler/rustc_middle/src/mir/mono.rs +++ b/compiler/rustc_middle/src/mir/mono.rs @@ -2,7 +2,6 @@ use std::borrow::Cow; use std::fmt; use std::hash::Hash; -use rustc_ast::expand::autodiff_attrs::AutoDiffItem; use rustc_data_structures::base_n::{BaseNString, CASE_INSENSITIVE, ToBaseN}; use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fx::FxIndexMap; @@ -337,7 +336,6 @@ impl ToStableHashKey> for MonoItem<'_> { pub struct MonoItemPartitions<'tcx> { pub codegen_units: &'tcx [CodegenUnit<'tcx>], pub all_mono_items: &'tcx DefIdSet, - pub autodiff_items: &'tcx [AutoDiffItem], } #[derive(Debug, HashStable)] diff --git a/compiler/rustc_monomorphize/Cargo.toml b/compiler/rustc_monomorphize/Cargo.toml index 0ed5b4fc0d09c..09a55f0b5f8da 100644 --- a/compiler/rustc_monomorphize/Cargo.toml +++ b/compiler/rustc_monomorphize/Cargo.toml @@ -6,7 +6,6 @@ edition = "2024" [dependencies] # tidy-alphabetical-start rustc_abi = { path = "../rustc_abi" } -rustc_ast = { path = "../rustc_ast" } rustc_data_structures = { path = "../rustc_data_structures" } rustc_errors = { path = "../rustc_errors" } rustc_fluent_macro = { path = "../rustc_fluent_macro" } @@ -15,7 +14,6 @@ rustc_macros = { path = "../rustc_macros" } rustc_middle = { path = "../rustc_middle" } rustc_session = { path = "../rustc_session" } rustc_span = { path = "../rustc_span" } -rustc_symbol_mangling = { path = "../rustc_symbol_mangling" } rustc_target = { path = "../rustc_target" } serde = "1" serde_json = "1" diff --git a/compiler/rustc_monomorphize/src/collector.rs b/compiler/rustc_monomorphize/src/collector.rs index 35b80a9b96f4d..92a976258a35d 100644 --- a/compiler/rustc_monomorphize/src/collector.rs +++ b/compiler/rustc_monomorphize/src/collector.rs @@ -205,6 +205,8 @@ //! this is not implemented however: a mono item will be produced //! regardless of whether it is actually needed or not. +mod autodiff; + use std::cell::OnceCell; use std::path::PathBuf; @@ -237,6 +239,7 @@ use rustc_span::source_map::{Spanned, dummy_spanned, respan}; use rustc_span::{DUMMY_SP, Span}; use tracing::{debug, instrument, trace}; +use crate::collector::autodiff::collect_enzyme_autodiff_fn; use crate::errors::{self, EncounteredErrorWhileInstantiating, NoOptimizedMir, RecursionLimit}; #[derive(PartialEq)] @@ -913,6 +916,8 @@ fn visit_instance_use<'tcx>( return; } if let Some(intrinsic) = tcx.intrinsic(instance.def_id()) { + collect_enzyme_autodiff_fn(tcx, instance, intrinsic, output); + if let Some(_requirement) = ValidityRequirement::from_intrinsic(intrinsic.name) { // The intrinsics assert_inhabited, assert_zero_valid, and assert_mem_uninitialized_valid will // be lowered in codegen to nothing or a call to panic_nounwind. So if we encounter any diff --git a/compiler/rustc_monomorphize/src/collector/autodiff.rs b/compiler/rustc_monomorphize/src/collector/autodiff.rs new file mode 100644 index 0000000000000..37b1375ed446d --- /dev/null +++ b/compiler/rustc_monomorphize/src/collector/autodiff.rs @@ -0,0 +1,48 @@ +use rustc_middle::bug; +use rustc_middle::ty::{self, GenericArg, IntrinsicDef, TyCtxt}; + +use crate::collector::{MonoItems, create_fn_mono_item}; + +// Here, we force both primal and diff function to be collected in +// mono so this does not interfere in `enzyme_autodiff` intrinsics +// codegen process. If they are unused, they will be removed later and +// won't be present at LLVM-IR. +pub(crate) fn collect_enzyme_autodiff_fn<'tcx>( + tcx: TyCtxt<'tcx>, + instance: ty::Instance<'tcx>, + intrinsic: IntrinsicDef, + output: &mut MonoItems<'tcx>, +) { + if intrinsic.name != rustc_span::sym::enzyme_autodiff { + return; + }; + + collect_autodiff_fn_from_arg(instance.args[0], tcx, output); +} + +fn collect_autodiff_fn_from_arg<'tcx>( + arg: GenericArg<'tcx>, + tcx: TyCtxt<'tcx>, + output: &mut MonoItems<'tcx>, +) { + let (instance, span) = match arg.kind() { + ty::GenericArgKind::Type(ty) => match ty.kind() { + ty::FnDef(def_id, substs) => { + let span = tcx.def_span(def_id); + let instance = ty::Instance::expect_resolve( + tcx, + ty::TypingEnv::non_body_analysis(tcx, def_id), + *def_id, + substs, + span, + ); + + (instance, span) + } + _ => bug!("expected autodiff function"), + }, + _ => bug!("expected type when matching autodiff arg"), + }; + + output.push(create_fn_mono_item(tcx, instance, span)); +} diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs index d76b27d9970b6..8aa7c3eea619c 100644 --- a/compiler/rustc_monomorphize/src/partitioning.rs +++ b/compiler/rustc_monomorphize/src/partitioning.rs @@ -92,8 +92,6 @@ //! source-level module, functions from the same module will be available for //! inlining, even when they are not marked `#[inline]`. -mod autodiff; - use std::cmp; use std::collections::hash_map::Entry; use std::fs::{self, File}; @@ -251,17 +249,7 @@ where always_export_generics, ); - // We can't differentiate a function that got inlined. - let autodiff_active = cfg!(llvm_enzyme) - && matches!(mono_item, MonoItem::Fn(_)) - && cx - .tcx - .codegen_fn_attrs(mono_item.def_id()) - .autodiff_item - .as_ref() - .is_some_and(|ad| ad.is_active()); - - if !autodiff_active && visibility == Visibility::Hidden && can_be_internalized { + if visibility == Visibility::Hidden && can_be_internalized { internalization_candidates.insert(mono_item); } let size_estimate = mono_item.size_estimate(cx.tcx); @@ -1156,27 +1144,15 @@ fn collect_and_partition_mono_items(tcx: TyCtxt<'_>, (): ()) -> MonoItemPartitio } } - #[cfg(not(llvm_enzyme))] - let autodiff_mono_items: Vec<_> = vec![]; - #[cfg(llvm_enzyme)] - let mut autodiff_mono_items: Vec<_> = vec![]; let mono_items: DefIdSet = items .iter() .filter_map(|mono_item| match *mono_item { - MonoItem::Fn(ref instance) => { - #[cfg(llvm_enzyme)] - autodiff_mono_items.push((mono_item, instance)); - Some(instance.def_id()) - } + MonoItem::Fn(ref instance) => Some(instance.def_id()), MonoItem::Static(def_id) => Some(def_id), _ => None, }) .collect(); - let autodiff_items = - autodiff::find_autodiff_source_functions(tcx, &usage_map, autodiff_mono_items); - let autodiff_items = tcx.arena.alloc_from_iter(autodiff_items); - // Output monomorphization stats per def_id if let SwitchWithOptPath::Enabled(ref path) = tcx.sess.opts.unstable_opts.dump_mono_stats && let Err(err) = @@ -1234,11 +1210,7 @@ fn collect_and_partition_mono_items(tcx: TyCtxt<'_>, (): ()) -> MonoItemPartitio } } - MonoItemPartitions { - all_mono_items: tcx.arena.alloc(mono_items), - codegen_units, - autodiff_items, - } + MonoItemPartitions { all_mono_items: tcx.arena.alloc(mono_items), codegen_units } } /// Outputs stats about instantiation counts and estimated size, per `MonoItem`'s diff --git a/compiler/rustc_monomorphize/src/partitioning/autodiff.rs b/compiler/rustc_monomorphize/src/partitioning/autodiff.rs deleted file mode 100644 index 22d593b80b895..0000000000000 --- a/compiler/rustc_monomorphize/src/partitioning/autodiff.rs +++ /dev/null @@ -1,143 +0,0 @@ -use rustc_ast::expand::autodiff_attrs::{AutoDiffItem, DiffActivity}; -use rustc_hir::def_id::LOCAL_CRATE; -use rustc_middle::bug; -use rustc_middle::mir::mono::MonoItem; -use rustc_middle::ty::{self, Instance, PseudoCanonicalInput, Ty, TyCtxt, TypingEnv}; -use rustc_symbol_mangling::symbol_name_for_instance_in_crate; -use tracing::{debug, trace}; - -use crate::partitioning::UsageMap; - -fn adjust_activity_to_abi<'tcx>(tcx: TyCtxt<'tcx>, fn_ty: Ty<'tcx>, da: &mut Vec) { - if !matches!(fn_ty.kind(), ty::FnDef(..)) { - bug!("expected fn def for autodiff, got {:?}", fn_ty); - } - - // We don't actually pass the types back into the type system. - // All we do is decide how to handle the arguments. - let sig = fn_ty.fn_sig(tcx).skip_binder(); - - let mut new_activities = vec![]; - let mut new_positions = vec![]; - for (i, ty) in sig.inputs().iter().enumerate() { - if let Some(inner_ty) = ty.builtin_deref(true) { - if inner_ty.is_slice() { - // Now we need to figure out the size of each slice element in memory to allow - // safety checks and usability improvements in the backend. - let sty = match inner_ty.builtin_index() { - Some(sty) => sty, - None => { - panic!("slice element type unknown"); - } - }; - let pci = PseudoCanonicalInput { - typing_env: TypingEnv::fully_monomorphized(), - value: sty, - }; - - let layout = tcx.layout_of(pci); - let elem_size = match layout { - Ok(layout) => layout.size, - Err(_) => { - bug!("autodiff failed to compute slice element size"); - } - }; - let elem_size: u32 = elem_size.bytes() as u32; - - // We know that the length will be passed as extra arg. - if !da.is_empty() { - // We are looking at a slice. The length of that slice will become an - // extra integer on llvm level. Integers are always const. - // However, if the slice get's duplicated, we want to know to later check the - // size. So we mark the new size argument as FakeActivitySize. - // There is one FakeActivitySize per slice, so for convenience we store the - // slice element size in bytes in it. We will use the size in the backend. - let activity = match da[i] { - DiffActivity::DualOnly - | DiffActivity::Dual - | DiffActivity::Dualv - | DiffActivity::DuplicatedOnly - | DiffActivity::Duplicated => { - DiffActivity::FakeActivitySize(Some(elem_size)) - } - DiffActivity::Const => DiffActivity::Const, - _ => bug!("unexpected activity for ptr/ref"), - }; - new_activities.push(activity); - new_positions.push(i + 1); - } - - continue; - } - } - } - // now add the extra activities coming from slices - // Reverse order to not invalidate the indices - for _ in 0..new_activities.len() { - let pos = new_positions.pop().unwrap(); - let activity = new_activities.pop().unwrap(); - da.insert(pos, activity); - } -} - -pub(crate) fn find_autodiff_source_functions<'tcx>( - tcx: TyCtxt<'tcx>, - usage_map: &UsageMap<'tcx>, - autodiff_mono_items: Vec<(&MonoItem<'tcx>, &Instance<'tcx>)>, -) -> Vec { - let mut autodiff_items: Vec = vec![]; - for (item, instance) in autodiff_mono_items { - let target_id = instance.def_id(); - let cg_fn_attr = &tcx.codegen_fn_attrs(target_id).autodiff_item; - let Some(target_attrs) = cg_fn_attr else { - continue; - }; - let mut input_activities: Vec = target_attrs.input_activity.clone(); - if target_attrs.is_source() { - trace!("source found: {:?}", target_id); - } - if !target_attrs.apply_autodiff() { - continue; - } - - let target_symbol = symbol_name_for_instance_in_crate(tcx, instance.clone(), LOCAL_CRATE); - - let source = - usage_map.used_map.get(&item).unwrap().into_iter().find_map(|item| match *item { - MonoItem::Fn(ref instance_s) => { - let source_id = instance_s.def_id(); - if let Some(ad) = &tcx.codegen_fn_attrs(source_id).autodiff_item - && ad.is_active() - { - return Some(instance_s); - } - None - } - _ => None, - }); - let inst = match source { - Some(source) => source, - None => continue, - }; - - debug!("source_id: {:?}", inst.def_id()); - let fn_ty = inst.ty(tcx, ty::TypingEnv::fully_monomorphized()); - assert!(fn_ty.is_fn()); - adjust_activity_to_abi(tcx, fn_ty, &mut input_activities); - let symb = symbol_name_for_instance_in_crate(tcx, inst.clone(), LOCAL_CRATE); - - let mut new_target_attrs = target_attrs.clone(); - new_target_attrs.input_activity = input_activities; - let itm = new_target_attrs.into_item(symb, target_symbol); - autodiff_items.push(itm); - } - - if !autodiff_items.is_empty() { - trace!("AUTODIFF ITEMS EXIST"); - for item in &mut *autodiff_items { - trace!("{}", &item); - } - } - - autodiff_items -} diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index d54175548e30e..05c72f5e81e3b 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -915,6 +915,7 @@ symbols! { enumerate_method, env, env_CFG_RELEASE: env!("CFG_RELEASE"), + enzyme_autodiff, eprint_macro, eprintln_macro, eq, diff --git a/library/core/src/intrinsics/mod.rs b/library/core/src/intrinsics/mod.rs index 106cc725fee2c..eb4dd74c697c5 100644 --- a/library/core/src/intrinsics/mod.rs +++ b/library/core/src/intrinsics/mod.rs @@ -3163,6 +3163,44 @@ pub const unsafe fn copysignf64(x: f64, y: f64) -> f64; #[rustc_intrinsic] pub const unsafe fn copysignf128(x: f128, y: f128) -> f128; +/// Generates the LLVM body for the automatic differentiation of `f` using Enzyme, +/// with `df` as the derivative function and `args` as its arguments. +/// +/// Used internally as the body of `df` when expanding the `#[autodiff_forward]` +/// and `#[autodiff_reverse]` attribute macros. +/// +/// Type Parameters: +/// - `F`: The original function to differentiate. Must be a function item. +/// - `G`: The derivative function. Must be a function item. +/// - `T`: A tuple of arguments passed to `df`. +/// - `R`: The return type of the derivative function. +/// +/// This shows where the `enzyme_autodiff` intrinsic is used during macro expansion: +/// +/// ```rust,ignore (macro example) +/// #[autodiff_forward(df1, Dual, Const, Dual)] +/// pub fn f1(x: &[f64], y: f64) -> f64 { +/// unimplemented!() +/// } +/// ``` +/// +/// expands to: +/// +/// ```rust,ignore (macro example) +/// #[rustc_autodiff] +/// #[inline(never)] +/// pub fn f1(x: &[f64], y: f64) -> f64 { +/// ::core::panicking::panic("not implemented") +/// } +/// #[rustc_autodiff(Forward, 1, Dual, Const, Dual)] +/// pub fn df1(x: &[f64], bx_0: &[f64], y: f64) -> (f64, f64) { +/// ::core::intrinsics::enzyme_autodiff(f1::<>, df1::<>, (x, bx_0, y)) +/// } +/// ``` +#[rustc_nounwind] +#[rustc_intrinsic] +pub const fn enzyme_autodiff(f: F, df: G, args: T) -> R; + /// Inform Miri that a given pointer definitely has a certain alignment. #[cfg(miri)] #[rustc_allow_const_fn_unstable(const_eval_select)] diff --git a/library/core/src/macros/mod.rs b/library/core/src/macros/mod.rs index 3d57da63683b2..061427285631d 100644 --- a/library/core/src/macros/mod.rs +++ b/library/core/src/macros/mod.rs @@ -1491,6 +1491,7 @@ pub(crate) mod builtin { /// (or explicitly returns `-> ()`). Otherwise, it must be set to one of the allowed activities. #[unstable(feature = "autodiff", issue = "124509")] #[allow_internal_unstable(rustc_attrs)] + #[allow_internal_unstable(core_intrinsics)] #[rustc_builtin_macro] pub macro autodiff_forward($item:item) { /* compiler built-in */ @@ -1509,6 +1510,7 @@ pub(crate) mod builtin { /// (or explicitly returns `-> ()`). Otherwise, it must be set to one of the allowed activities. #[unstable(feature = "autodiff", issue = "124509")] #[allow_internal_unstable(rustc_attrs)] + #[allow_internal_unstable(core_intrinsics)] #[rustc_builtin_macro] pub macro autodiff_reverse($item:item) { /* compiler built-in */ diff --git a/src/doc/rustc-dev-guide/src/SUMMARY.md b/src/doc/rustc-dev-guide/src/SUMMARY.md index e3c0d50fcc737..8518f0033d068 100644 --- a/src/doc/rustc-dev-guide/src/SUMMARY.md +++ b/src/doc/rustc-dev-guide/src/SUMMARY.md @@ -107,7 +107,6 @@ - [Installation](./autodiff/installation.md) - [How to debug](./autodiff/debugging.md) - [Autodiff flags](./autodiff/flags.md) - - [Current limitations](./autodiff/limitations.md) # Source Code Representation diff --git a/src/doc/rustc-dev-guide/src/autodiff/limitations.md b/src/doc/rustc-dev-guide/src/autodiff/limitations.md deleted file mode 100644 index 90afbd51f3fd9..0000000000000 --- a/src/doc/rustc-dev-guide/src/autodiff/limitations.md +++ /dev/null @@ -1,27 +0,0 @@ -# Current limitations - -## Safety and Soundness - -Enzyme currently assumes that the user passes shadow arguments (`dx`, `dy`, ...) of appropriate size. Under Reverse Mode, we additionally assume that shadow arguments are mutable. In Reverse Mode we adjust the outermost pointer or reference to be mutable. Therefore `&f32` will receive the shadow type `&mut f32`. However, we do not check length for other types than slices (e.g. enums, Vec). We also do not enforce mutability of inner references, but will warn if we recognize them. We do intend to add additional checks over time. - -## ABI adjustments - -In some cases, a function parameter might get lowered in a way that we currently don't handle correctly, leading to a compile time type mismatch in the `rustc_codegen_llvm` backend. Here are some [examples](https://github.com/EnzymeAD/rust/issues/105). - -## Compile Times - -Enzyme will often achieve excellent runtime performance, but might increase your compile time by a large factor. For Rust, we already have made significant improvements and have a list of further improvements planed - please reach out if you have time to help here. - -### Type Analysis - -Most of the times, Type Analysis (TA) is the reason of large (>5x) compile time increases when using Enzyme. This poster explains why we need to run Type Analysis in the bottom left part: [Poster Link](https://c.wsmoses.com/posters/Enzyme-llvmdev.pdf). - -We intend to increase the number of locations where we pass down Type information based on Rust types, which in turn will reduce the number of locations where Enzyme has to run Type Analysis, which will help compile times. - -### Duplicated Optimizations - -The key reason for Enzyme offering often excellent performance is that Enzyme differentiates already optimized LLVM-IR. However, we also (have to) run LLVM's optimization pipeline after differentiating, to make sure that the code which Enzyme generates is optimized properly. As a result you should have excellent runtime performance (please fill an issue if not), but at a compile time cost for running optimizations twice. - -### Fat-LTO - -The usage of `#[autodiff(...)]` currently requires compiling your project with Fat-LTO. We technically only need LTO if the function being differentiated calls functions in other compilation units. Therefore, other solutions are possible, but this is the most simple one to get started. diff --git a/tests/codegen-llvm/autodiffv2.rs b/tests/codegen-llvm/autodiff/autodiffv2.rs similarity index 96% rename from tests/codegen-llvm/autodiffv2.rs rename to tests/codegen-llvm/autodiff/autodiffv2.rs index a40d19d3be3a8..85aed6a183b63 100644 --- a/tests/codegen-llvm/autodiffv2.rs +++ b/tests/codegen-llvm/autodiff/autodiffv2.rs @@ -26,12 +26,13 @@ #![feature(autodiff)] -use std::autodiff::autodiff; +use std::autodiff::autodiff_forward; +// CHECK: ; #[no_mangle] //#[autodiff(d_square1, Forward, Dual, Dual)] -#[autodiff(d_square2, Forward, 4, Dualv, Dualv)] -#[autodiff(d_square3, Forward, 4, Dual, Dual)] +#[autodiff_forward(d_square2, 4, Dualv, Dualv)] +#[autodiff_forward(d_square3, 4, Dual, Dual)] fn square(x: &[f32], y: &mut [f32]) { assert!(x.len() >= 4); assert!(y.len() >= 5); diff --git a/tests/codegen-llvm/autodiff/batched.rs b/tests/codegen-llvm/autodiff/batched.rs index d27aed50e6cc4..2104d2028493a 100644 --- a/tests/codegen-llvm/autodiff/batched.rs +++ b/tests/codegen-llvm/autodiff/batched.rs @@ -21,82 +21,39 @@ fn square(x: &f32) -> f32 { x * x } -// d_sqaure2 -// CHECK: define internal fastcc [4 x float] @fwddiffe4square(float %x.0.val, [4 x ptr] %"x'") -// CHECK-NEXT: start: -// CHECK-NEXT: %0 = extractvalue [4 x ptr] %"x'", 0 -// CHECK-NEXT: %"_2'ipl" = load float, ptr %0, align 4 -// CHECK-NEXT: %1 = extractvalue [4 x ptr] %"x'", 1 -// CHECK-NEXT: %"_2'ipl1" = load float, ptr %1, align 4 -// CHECK-NEXT: %2 = extractvalue [4 x ptr] %"x'", 2 -// CHECK-NEXT: %"_2'ipl2" = load float, ptr %2, align 4 -// CHECK-NEXT: %3 = extractvalue [4 x ptr] %"x'", 3 -// CHECK-NEXT: %"_2'ipl3" = load float, ptr %3, align 4 -// CHECK-NEXT: %4 = insertelement <4 x float> poison, float %"_2'ipl", i64 0 -// CHECK-NEXT: %5 = insertelement <4 x float> %4, float %"_2'ipl1", i64 1 -// CHECK-NEXT: %6 = insertelement <4 x float> %5, float %"_2'ipl2", i64 2 -// CHECK-NEXT: %7 = insertelement <4 x float> %6, float %"_2'ipl3", i64 3 -// CHECK-NEXT: %8 = fadd fast <4 x float> %7, %7 -// CHECK-NEXT: %9 = insertelement <4 x float> poison, float %x.0.val, i64 0 -// CHECK-NEXT: %10 = shufflevector <4 x float> %9, <4 x float> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: %11 = fmul fast <4 x float> %8, %10 -// CHECK-NEXT: %12 = extractelement <4 x float> %11, i64 0 -// CHECK-NEXT: %13 = insertvalue [4 x float] undef, float %12, 0 -// CHECK-NEXT: %14 = extractelement <4 x float> %11, i64 1 -// CHECK-NEXT: %15 = insertvalue [4 x float] %13, float %14, 1 -// CHECK-NEXT: %16 = extractelement <4 x float> %11, i64 2 -// CHECK-NEXT: %17 = insertvalue [4 x float] %15, float %16, 2 -// CHECK-NEXT: %18 = extractelement <4 x float> %11, i64 3 -// CHECK-NEXT: %19 = insertvalue [4 x float] %17, float %18, 3 -// CHECK-NEXT: ret [4 x float] %19 -// CHECK-NEXT: } - -// d_square3, the extra float is the original return value (x * x) -// CHECK: define internal fastcc { float, [4 x float] } @fwddiffe4square.1(float %x.0.val, [4 x ptr] %"x'") -// CHECK-NEXT: start: -// CHECK-NEXT: %0 = extractvalue [4 x ptr] %"x'", 0 -// CHECK-NEXT: %"_2'ipl" = load float, ptr %0, align 4 -// CHECK-NEXT: %1 = extractvalue [4 x ptr] %"x'", 1 -// CHECK-NEXT: %"_2'ipl1" = load float, ptr %1, align 4 -// CHECK-NEXT: %2 = extractvalue [4 x ptr] %"x'", 2 -// CHECK-NEXT: %"_2'ipl2" = load float, ptr %2, align 4 -// CHECK-NEXT: %3 = extractvalue [4 x ptr] %"x'", 3 -// CHECK-NEXT: %"_2'ipl3" = load float, ptr %3, align 4 -// CHECK-NEXT: %_0 = fmul float %x.0.val, %x.0.val -// CHECK-NEXT: %4 = insertelement <4 x float> poison, float %"_2'ipl", i64 0 -// CHECK-NEXT: %5 = insertelement <4 x float> %4, float %"_2'ipl1", i64 1 -// CHECK-NEXT: %6 = insertelement <4 x float> %5, float %"_2'ipl2", i64 2 -// CHECK-NEXT: %7 = insertelement <4 x float> %6, float %"_2'ipl3", i64 3 -// CHECK-NEXT: %8 = fadd fast <4 x float> %7, %7 -// CHECK-NEXT: %9 = insertelement <4 x float> poison, float %x.0.val, i64 0 -// CHECK-NEXT: %10 = shufflevector <4 x float> %9, <4 x float> poison, <4 x i32> zeroinitializer -// CHECK-NEXT: %11 = fmul fast <4 x float> %8, %10 -// CHECK-NEXT: %12 = extractelement <4 x float> %11, i64 0 -// CHECK-NEXT: %13 = insertvalue [4 x float] undef, float %12, 0 -// CHECK-NEXT: %14 = extractelement <4 x float> %11, i64 1 -// CHECK-NEXT: %15 = insertvalue [4 x float] %13, float %14, 1 -// CHECK-NEXT: %16 = extractelement <4 x float> %11, i64 2 -// CHECK-NEXT: %17 = insertvalue [4 x float] %15, float %16, 2 -// CHECK-NEXT: %18 = extractelement <4 x float> %11, i64 3 -// CHECK-NEXT: %19 = insertvalue [4 x float] %17, float %18, 3 -// CHECK-NEXT: %20 = insertvalue { float, [4 x float] } undef, float %_0, 0 -// CHECK-NEXT: %21 = insertvalue { float, [4 x float] } %20, [4 x float] %19, 1 -// CHECK-NEXT: ret { float, [4 x float] } %21 -// CHECK-NEXT: } - fn main() { let x = std::hint::black_box(3.0); + + // square(&x) + // CHECK: %_0.i = fmul float %_2.i, %_2.i + // CHECK-NEXT: store float %_0.i, ptr %output, align 4 let output = square(&x); dbg!(&output); assert_eq!(9.0, output); + + // square(&x) + // CHECK: %_2.i26 = load float, ptr %x, align 4 + // CHECK-NEXT: %_0.i27 = fmul float %_2.i26, %_2.i26 dbg!(square(&x)); let mut df_dx1 = 1.0; let mut df_dx2 = 2.0; let mut df_dx3 = 3.0; let mut df_dx4 = 0.0; + + // [o1, o2, o3, o4] (o4 is being optimized away as its smth * 0.0) + // CHECK: %x.val = load float, ptr %x, align 4 + // CHECK-NEXT: %13 = fmul fast float %x.val, 2.000000e+00 + // CHECK-NEXT: %14 = fmul fast float %x.val, 4.000000e+00 + // CHECK-NEXT: %15 = fmul fast float %x.val, 6.000000e+00 let [o1, o2, o3, o4] = d_square2(&x, &mut df_dx1, &mut df_dx2, &mut df_dx3, &mut df_dx4); dbg!(o1, o2, o3, o4); + + // [output2, o1, o2, o3, o4] (o4 is being optimized away as its smth * 0.0) + // CHECK: %_0.i45 = fmul float %x.val35, %x.val35 + // CHECK-NEXT: %40 = fmul fast float %x.val35, 2.000000e+00 + // CHECK-NEXT: %41 = fmul fast float %x.val35, 4.000000e+00 + // CHECK-NEXT: %42 = fmul fast float %x.val35, 6.000000e+00 let [output2, o1, o2, o3, o4] = d_square1(&x, &mut df_dx1, &mut df_dx2, &mut df_dx3, &mut df_dx4); dbg!(o1, o2, o3, o4); @@ -109,8 +66,22 @@ fn main() { assert_eq!(2.0, df_dx2); assert_eq!(3.0, df_dx3); assert_eq!(0.0, df_dx4); + + // d_square3(&x, &mut df_dx1) + // CHECK: %x.val39 = load float, ptr %x, align 4 + // CHECK-NEXT: %72 = fmul fast float %x.val39, 2.000000e+00 assert_eq!(d_square3(&x, &mut df_dx1), 2.0 * o1); + + // d_square3(&x, &mut df_dx2) + // CHECK: %74 = fmul fast float %x.val39, 4.000000e+00 + // CHECK-NEXT: store float %74, ptr %_191, align 4 assert_eq!(d_square3(&x, &mut df_dx2), 2.0 * o2); + + // d_square3(&x, &mut df_dx3) + // CHECK: %76 = fmul fast float %x.val39, 6.000000e+00 + // CHECK-NEXT: store float %76, ptr %_200, align 4 assert_eq!(d_square3(&x, &mut df_dx3), 2.0 * o3); + + // d_square3(&x, &mut df_dx3) is being optimized away as it's smth * 0.0 assert_eq!(d_square3(&x, &mut df_dx4), 2.0 * o4); } diff --git a/tests/codegen-llvm/autodiff/generic.rs b/tests/codegen-llvm/autodiff/generic.rs index 2f674079be021..603ca849e689e 100644 --- a/tests/codegen-llvm/autodiff/generic.rs +++ b/tests/codegen-llvm/autodiff/generic.rs @@ -10,23 +10,13 @@ fn square + Copy>(x: &T) -> T { *x * *x } -// Ensure that `d_square::` code is generated even if `square::` was never called +// Ensure that `square::` code is generated // -// CHECK: ; generic::square -// CHECK-NEXT: ; Function Attrs: -// CHECK-NEXT: define internal {{.*}} double -// CHECK-NEXT: start: -// CHECK-NOT: ret -// CHECK: fmul double - -// Ensure that `d_square::` code is generated +// CHECK: %1 = fmul float %xf32, %xf32 + +// Ensure that `d_square::` code is generated even if `square::` was never called // -// CHECK: ; generic::square -// CHECK-NEXT: ; Function Attrs: {{.*}} -// CHECK-NEXT: define internal {{.*}} float -// CHECK-NEXT: start: -// CHECK-NOT: ret -// CHECK: fmul float +// CHECK: define internal { double } @diffe_ZN7generic6square17he5c855620985cd59E fn main() { let xf32: f32 = std::hint::black_box(3.0); diff --git a/tests/codegen-llvm/autodiff/identical_fnc.rs b/tests/codegen-llvm/autodiff/identical_fnc.rs index 1c25b3d09ab0d..e3b5db291cfbb 100644 --- a/tests/codegen-llvm/autodiff/identical_fnc.rs +++ b/tests/codegen-llvm/autodiff/identical_fnc.rs @@ -23,17 +23,13 @@ fn square2(x: &f64) -> f64 { x * x } -// CHECK:; identical_fnc::main -// CHECK-NEXT:; Function Attrs: -// CHECK-NEXT:define internal void @_ZN13identical_fnc4main17hf4dbc69c8d2f9130E() -// CHECK-NEXT:start: -// CHECK-NOT:br -// CHECK-NOT:ret -// CHECK:; call identical_fnc::d_square -// CHECK-NEXT: call fastcc void @_ZN13identical_fnc8d_square17h4c364207a2f8e06dE(double %x.val, ptr noalias noundef nonnull align 8 dereferenceable(8) %dx1) -// CHECK-NEXT:; call identical_fnc::d_square -// CHECK-NEXT: call fastcc void @_ZN13identical_fnc8d_square17h4c364207a2f8e06dE(double %x.val, ptr noalias noundef nonnull align 8 dereferenceable(8) %dx2) - +// CHECK: %0 = fadd fast double %x.val, %x.val +// CHECK-NEXT: %1 = load double, ptr %dx1, align 8 +// CHECK-NEXT: %2 = fadd fast double %1, %0 +// CHECK-NEXT: store double %2, ptr %dx1, align 8 +// CHECK-NEXT: %3 = load double, ptr %dx2, align 8 +// CHECK-NEXT: %4 = fadd fast double %3, %0 +// CHECK-NEXT: store double %4, ptr %dx2, align 8 fn main() { let x = std::hint::black_box(3.0); let mut dx1 = std::hint::black_box(1.0); diff --git a/tests/codegen-llvm/autodiff/inline.rs b/tests/codegen-llvm/autodiff/inline.rs deleted file mode 100644 index 65bed170207cc..0000000000000 --- a/tests/codegen-llvm/autodiff/inline.rs +++ /dev/null @@ -1,23 +0,0 @@ -//@ compile-flags: -Zautodiff=Enable -C opt-level=3 -Clto=fat -Zautodiff=NoPostopt -//@ no-prefer-dynamic -//@ needs-enzyme - -#![feature(autodiff)] - -use std::autodiff::autodiff_reverse; - -#[autodiff_reverse(d_square, Duplicated, Active)] -fn square(x: &f64) -> f64 { - x * x -} - -// CHECK: ; inline::d_square -// CHECK-NEXT: ; Function Attrs: alwaysinline -// CHECK-NOT: noinline -// CHECK-NEXT: define internal fastcc void @_ZN6inline8d_square17h021c74e92c259cdeE -fn main() { - let x = std::hint::black_box(3.0); - let mut dx1 = std::hint::black_box(1.0); - let _ = d_square(&x, &mut dx1, 1.0); - assert_eq!(dx1, 6.0); -} diff --git a/tests/codegen-llvm/autodiff/scalar.rs b/tests/codegen-llvm/autodiff/scalar.rs index 096b4209e84ad..ca6bf76aeb12b 100644 --- a/tests/codegen-llvm/autodiff/scalar.rs +++ b/tests/codegen-llvm/autodiff/scalar.rs @@ -11,16 +11,11 @@ fn square(x: &f64) -> f64 { x * x } -// CHECK:define internal fastcc double @diffesquare(double %x.0.val, ptr nocapture nonnull align 8 %"x'" -// CHECK-NEXT:invertstart: -// CHECK-NEXT: %_0 = fmul double %x.0.val, %x.0.val -// CHECK-NEXT: %0 = fadd fast double %x.0.val, %x.0.val -// CHECK-NEXT: %1 = load double, ptr %"x'", align 8 -// CHECK-NEXT: %2 = fadd fast double %1, %0 -// CHECK-NEXT: store double %2, ptr %"x'", align 8 -// CHECK-NEXT: ret double %_0 -// CHECK-NEXT:} +// square +// CHECK: %_0.i = fmul double %_2.i, %_2.i +// d_square +// CHECK: %0 = fadd fast double %_2.i, %_2.i fn main() { let x = std::hint::black_box(3.0); let output = square(&x); diff --git a/tests/codegen-llvm/autodiff/sret.rs b/tests/codegen-llvm/autodiff/sret.rs index d2fa85e3e3787..658055a561a1a 100644 --- a/tests/codegen-llvm/autodiff/sret.rs +++ b/tests/codegen-llvm/autodiff/sret.rs @@ -17,26 +17,20 @@ fn primal(x: f32, y: f32) -> f64 { (x * x * y) as f64 } -// CHECK:define internal fastcc void @_ZN4sret2df17h93be4316dd8ea006E(ptr dead_on_unwind noalias nocapture noundef nonnull writable writeonly align 8 dereferenceable(16) initializes((0, 16)) %_0, float noundef %x, float noundef %y) -// CHECK-NEXT:start: -// CHECK-NEXT: %0 = tail call fastcc { double, float, float } @diffeprimal(float %x, float %y) -// CHECK-NEXT: %.elt = extractvalue { double, float, float } %0, 0 -// CHECK-NEXT: store double %.elt, ptr %_0, align 8 -// CHECK-NEXT: %_0.repack1 = getelementptr inbounds nuw i8, ptr %_0, i64 8 -// CHECK-NEXT: %.elt2 = extractvalue { double, float, float } %0, 1 -// CHECK-NEXT: store float %.elt2, ptr %_0.repack1, align 8 -// CHECK-NEXT: %_0.repack3 = getelementptr inbounds nuw i8, ptr %_0, i64 12 -// CHECK-NEXT: %.elt4 = extractvalue { double, float, float } %0, 2 -// CHECK-NEXT: store float %.elt4, ptr %_0.repack3, align 4 -// CHECK-NEXT: ret void -// CHECK-NEXT:} - +// CHECK: %_4.i = fmul float %x, %x +// CHECK-NEXT: %_3.i = fmul float %_4.i, %y +// CHECK-NEXT: %_0.i = fpext float %_3.i to double +// CHECK-NEXT: %3 = fadd fast float %y, %y +// CHECK-NEXT: %4 = fmul fast float %3, %x +// CHECK-NEXT: store double %_0.i, ptr %r1, align 8 +// CHECK-NEXT: store float %4, ptr %r2, align 4 +// CHECK-NEXT: store float %_4.i, ptr %r3, align 4 fn main() { let x = std::hint::black_box(3.0); let y = std::hint::black_box(2.5); let scalar = std::hint::black_box(1.0); let (r1, r2, r3) = df(x, y, scalar); - // 3*3*1.5 = 22.5 + // 3*3*2.5 = 22.5 assert_eq!(r1, 22.5); // 2*x*y = 2*3*2.5 = 15.0 assert_eq!(r2, 15.0); diff --git a/tests/codegen-llvm/autodiff/trait.rs b/tests/codegen-llvm/autodiff/trait.rs new file mode 100644 index 0000000000000..701f3a9e843bd --- /dev/null +++ b/tests/codegen-llvm/autodiff/trait.rs @@ -0,0 +1,30 @@ +//@ compile-flags: -Zautodiff=Enable -Zautodiff=NoPostopt -C opt-level=3 -Clto=fat +//@ no-prefer-dynamic +//@ needs-enzyme + +// Just check it does not crash for now +// CHECK: ; +#![feature(autodiff)] + +use std::autodiff::autodiff_reverse; + +struct Foo { + a: f64, +} + +trait MyTrait { + fn f(&self, x: f64) -> f64; + fn df(&self, x: f64, seed: f64) -> (f64, f64); +} + +impl MyTrait for Foo { + #[autodiff_reverse(df, Const, Active, Active)] + fn f(&self, x: f64) -> f64 { + self.a * 0.25 * (x * x - 1.0 - 2.0 * x.ln()) + } +} + +fn main() { + let foo = Foo { a: 3.0f64 }; + dbg!(foo.df(1.0, 1.0)); +} diff --git a/tests/pretty/autodiff/autodiff_forward.pp b/tests/pretty/autodiff/autodiff_forward.pp index a2525abc83207..e337f4ac21202 100644 --- a/tests/pretty/autodiff/autodiff_forward.pp +++ b/tests/pretty/autodiff/autodiff_forward.pp @@ -16,7 +16,6 @@ use std::autodiff::{autodiff_forward, autodiff_reverse}; #[rustc_autodiff] -#[inline(never)] pub fn f1(x: &[f64], y: f64) -> f64 { @@ -36,163 +35,102 @@ ::core::panicking::panic("not implemented") } #[rustc_autodiff(Forward, 1, Dual, Const, Dual)] -#[inline(never)] pub fn df1(x: &[f64], bx_0: &[f64], y: f64) -> (f64, f64) { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f1(x, y)); - ::core::hint::black_box((bx_0,)); - ::core::hint::black_box(<(f64, f64)>::default()) + ::core::intrinsics::enzyme_autodiff(f1::<>, df1::<>, (x, bx_0, y)) } #[rustc_autodiff] -#[inline(never)] pub fn f2(x: &[f64], y: f64) -> f64 { ::core::panicking::panic("not implemented") } #[rustc_autodiff(Forward, 1, Dual, Const, Const)] -#[inline(never)] pub fn df2(x: &[f64], bx_0: &[f64], y: f64) -> f64 { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f2(x, y)); - ::core::hint::black_box((bx_0,)); - ::core::hint::black_box(f2(x, y)) + ::core::intrinsics::enzyme_autodiff(f2::<>, df2::<>, (x, bx_0, y)) } #[rustc_autodiff] -#[inline(never)] pub fn f3(x: &[f64], y: f64) -> f64 { ::core::panicking::panic("not implemented") } #[rustc_autodiff(Forward, 1, Dual, Const, Const)] -#[inline(never)] pub fn df3(x: &[f64], bx_0: &[f64], y: f64) -> f64 { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f3(x, y)); - ::core::hint::black_box((bx_0,)); - ::core::hint::black_box(f3(x, y)) + ::core::intrinsics::enzyme_autodiff(f3::<>, df3::<>, (x, bx_0, y)) } #[rustc_autodiff] -#[inline(never)] pub fn f4() {} #[rustc_autodiff(Forward, 1, None)] -#[inline(never)] pub fn df4() -> () { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f4()); - ::core::hint::black_box(()); + ::core::intrinsics::enzyme_autodiff(f4::<>, df4::<>, ()) } #[rustc_autodiff] -#[inline(never)] pub fn f5(x: &[f64], y: f64) -> f64 { ::core::panicking::panic("not implemented") } #[rustc_autodiff(Forward, 1, Const, Dual, Const)] -#[inline(never)] pub fn df5_y(x: &[f64], y: f64, by_0: f64) -> f64 { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f5(x, y)); - ::core::hint::black_box((by_0,)); - ::core::hint::black_box(f5(x, y)) + ::core::intrinsics::enzyme_autodiff(f5::<>, df5_y::<>, (x, y, by_0)) } #[rustc_autodiff(Forward, 1, Dual, Const, Const)] -#[inline(never)] pub fn df5_x(x: &[f64], bx_0: &[f64], y: f64) -> f64 { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f5(x, y)); - ::core::hint::black_box((bx_0,)); - ::core::hint::black_box(f5(x, y)) + ::core::intrinsics::enzyme_autodiff(f5::<>, df5_x::<>, (x, bx_0, y)) } #[rustc_autodiff(Reverse, 1, Duplicated, Const, Active)] -#[inline(never)] pub fn df5_rev(x: &[f64], dx_0: &mut [f64], y: f64, dret: f64) -> f64 { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f5(x, y)); - ::core::hint::black_box((dx_0, dret)); - ::core::hint::black_box(f5(x, y)) + ::core::intrinsics::enzyme_autodiff(f5::<>, df5_rev::<>, + (x, dx_0, y, dret)) } struct DoesNotImplDefault; #[rustc_autodiff] -#[inline(never)] pub fn f6() -> DoesNotImplDefault { ::core::panicking::panic("not implemented") } #[rustc_autodiff(Forward, 1, Const)] -#[inline(never)] pub fn df6() -> DoesNotImplDefault { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f6()); - ::core::hint::black_box(()); - ::core::hint::black_box(f6()) + ::core::intrinsics::enzyme_autodiff(f6::<>, df6::<>, ()) } #[rustc_autodiff] -#[inline(never)] pub fn f7(x: f32) -> () {} #[rustc_autodiff(Forward, 1, Const, None)] -#[inline(never)] pub fn df7(x: f32) -> () { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f7(x)); - ::core::hint::black_box(()); + ::core::intrinsics::enzyme_autodiff(f7::<>, df7::<>, (x,)) } #[no_mangle] #[rustc_autodiff] -#[inline(never)] fn f8(x: &f32) -> f32 { ::core::panicking::panic("not implemented") } #[rustc_autodiff(Forward, 4, Dual, Dual)] -#[inline(never)] fn f8_3(x: &f32, bx_0: &f32, bx_1: &f32, bx_2: &f32, bx_3: &f32) -> [f32; 5usize] { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f8(x)); - ::core::hint::black_box((bx_0, bx_1, bx_2, bx_3)); - ::core::hint::black_box(<[f32; 5usize]>::default()) + ::core::intrinsics::enzyme_autodiff(f8::<>, f8_3::<>, + (x, bx_0, bx_1, bx_2, bx_3)) } #[rustc_autodiff(Forward, 4, Dual, DualOnly)] -#[inline(never)] fn f8_2(x: &f32, bx_0: &f32, bx_1: &f32, bx_2: &f32, bx_3: &f32) -> [f32; 4usize] { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f8(x)); - ::core::hint::black_box((bx_0, bx_1, bx_2, bx_3)); - ::core::hint::black_box(<[f32; 4usize]>::default()) + ::core::intrinsics::enzyme_autodiff(f8::<>, f8_2::<>, + (x, bx_0, bx_1, bx_2, bx_3)) } #[rustc_autodiff(Forward, 1, Dual, DualOnly)] -#[inline(never)] fn f8_1(x: &f32, bx_0: &f32) -> f32 { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f8(x)); - ::core::hint::black_box((bx_0,)); - ::core::hint::black_box(::default()) + ::core::intrinsics::enzyme_autodiff(f8::<>, f8_1::<>, (x, bx_0)) } pub fn f9() { #[rustc_autodiff] - #[inline(never)] fn inner(x: f32) -> f32 { x * x } #[rustc_autodiff(Forward, 1, Dual, Dual)] - #[inline(never)] fn d_inner_2(x: f32, bx_0: f32) -> (f32, f32) { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(inner(x)); - ::core::hint::black_box((bx_0,)); - ::core::hint::black_box(<(f32, f32)>::default()) + ::core::intrinsics::enzyme_autodiff(inner::<>, d_inner_2::<>, + (x, bx_0)) } #[rustc_autodiff(Forward, 1, Dual, DualOnly)] - #[inline(never)] fn d_inner_1(x: f32, bx_0: f32) -> f32 { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(inner(x)); - ::core::hint::black_box((bx_0,)); - ::core::hint::black_box(::default()) + ::core::intrinsics::enzyme_autodiff(inner::<>, d_inner_1::<>, + (x, bx_0)) } } #[rustc_autodiff] -#[inline(never)] pub fn f10 + Copy>(x: &T) -> T { *x * *x } #[rustc_autodiff(Reverse, 1, Duplicated, Active)] -#[inline(never)] pub fn d_square + Copy>(x: &T, dx_0: &mut T, dret: T) -> T { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f10::(x)); - ::core::hint::black_box((dx_0, dret)); - ::core::hint::black_box(f10::(x)) + ::core::intrinsics::enzyme_autodiff(f10::, d_square::, + (x, dx_0, dret)) } fn main() {} diff --git a/tests/pretty/autodiff/autodiff_reverse.pp b/tests/pretty/autodiff/autodiff_reverse.pp index e67c3443ddef1..9fd9be7b1a87a 100644 --- a/tests/pretty/autodiff/autodiff_reverse.pp +++ b/tests/pretty/autodiff/autodiff_reverse.pp @@ -16,7 +16,6 @@ use std::autodiff::autodiff_reverse; #[rustc_autodiff] -#[inline(never)] pub fn f1(x: &[f64], y: f64) -> f64 { // Not the most interesting derivative, but who are we to judge @@ -29,58 +28,35 @@ ::core::panicking::panic("not implemented") } #[rustc_autodiff(Reverse, 1, Duplicated, Const, Active)] -#[inline(never)] pub fn df1(x: &[f64], dx_0: &mut [f64], y: f64, dret: f64) -> f64 { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f1(x, y)); - ::core::hint::black_box((dx_0, dret)); - ::core::hint::black_box(f1(x, y)) + ::core::intrinsics::enzyme_autodiff(f1::<>, df1::<>, (x, dx_0, y, dret)) } #[rustc_autodiff] -#[inline(never)] pub fn f2() {} #[rustc_autodiff(Reverse, 1, None)] -#[inline(never)] -pub fn df2() { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f2()); - ::core::hint::black_box(()); -} +pub fn df2() { ::core::intrinsics::enzyme_autodiff(f2::<>, df2::<>, ()) } #[rustc_autodiff] -#[inline(never)] pub fn f3(x: &[f64], y: f64) -> f64 { ::core::panicking::panic("not implemented") } #[rustc_autodiff(Reverse, 1, Duplicated, Const, Active)] -#[inline(never)] pub fn df3(x: &[f64], dx_0: &mut [f64], y: f64, dret: f64) -> f64 { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f3(x, y)); - ::core::hint::black_box((dx_0, dret)); - ::core::hint::black_box(f3(x, y)) + ::core::intrinsics::enzyme_autodiff(f3::<>, df3::<>, (x, dx_0, y, dret)) } enum Foo { Reverse, } use Foo::Reverse; #[rustc_autodiff] -#[inline(never)] pub fn f4(x: f32) { ::core::panicking::panic("not implemented") } #[rustc_autodiff(Reverse, 1, Const, None)] -#[inline(never)] pub fn df4(x: f32) { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f4(x)); - ::core::hint::black_box(()); + ::core::intrinsics::enzyme_autodiff(f4::<>, df4::<>, (x,)) } #[rustc_autodiff] -#[inline(never)] pub fn f5(x: *const f32, y: &f32) { ::core::panicking::panic("not implemented") } #[rustc_autodiff(Reverse, 1, DuplicatedOnly, Duplicated, None)] -#[inline(never)] pub unsafe fn df5(x: *const f32, dx_0: *mut f32, y: &f32, dy_0: &mut f32) { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(f5(x, y)); - ::core::hint::black_box((dx_0, dy_0)); + ::core::intrinsics::enzyme_autodiff(f5::<>, df5::<>, (x, dx_0, y, dy_0)) } fn main() {} diff --git a/tests/pretty/autodiff/autodiff_reverse.rs b/tests/pretty/autodiff/autodiff_reverse.rs index d37e5e3eb4cec..c50b81d7780d0 100644 --- a/tests/pretty/autodiff/autodiff_reverse.rs +++ b/tests/pretty/autodiff/autodiff_reverse.rs @@ -23,7 +23,9 @@ pub fn f3(x: &[f64], y: f64) -> f64 { unimplemented!() } -enum Foo { Reverse } +enum Foo { + Reverse, +} use Foo::Reverse; // What happens if we already have Reverse in type (enum variant decl) and value (enum variant // constructor) namespace? > It's expected to work normally. diff --git a/tests/pretty/autodiff/inherent_impl.pp b/tests/pretty/autodiff/inherent_impl.pp index d18061b2dbdef..36ff402f8bcca 100644 --- a/tests/pretty/autodiff/inherent_impl.pp +++ b/tests/pretty/autodiff/inherent_impl.pp @@ -26,16 +26,12 @@ impl MyTrait for Foo { #[rustc_autodiff] - #[inline(never)] fn f(&self, x: f64) -> f64 { self.a * 0.25 * (x * x - 1.0 - 2.0 * x.ln()) } #[rustc_autodiff(Reverse, 1, Const, Active, Active)] - #[inline(never)] fn df(&self, x: f64, dret: f64) -> (f64, f64) { - unsafe { asm!("NOP", options(pure, nomem)); }; - ::core::hint::black_box(self.f(x)); - ::core::hint::black_box((dret,)); - ::core::hint::black_box((self.f(x), f64::default())) + ::core::intrinsics::enzyme_autodiff(Self::f::<>, Self::df::<>, + (self, x, dret)) } } diff --git a/triagebot.toml b/triagebot.toml index 168815465b614..8ad448981bab7 100644 --- a/triagebot.toml +++ b/triagebot.toml @@ -282,7 +282,6 @@ trigger_files = [ "src/tools/enzyme", "src/doc/unstable-book/src/compiler-flags/autodiff.md", "compiler/rustc_ast/src/expand/autodiff_attrs.rs", - "compiler/rustc_monomorphize/src/partitioning/autodiff.rs", "compiler/rustc_codegen_llvm/src/builder/autodiff.rs", "compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs", ] @@ -1278,8 +1277,6 @@ cc = ["@ZuseZ4"] cc = ["@ZuseZ4"] [mentions."compiler/rustc_builtin_macros/src/autodiff.rs"] cc = ["@ZuseZ4"] -[mentions."compiler/rustc_monomorphize/src/partitioning/autodiff.rs"] -cc = ["@ZuseZ4"] [mentions."compiler/rustc_codegen_llvm/src/builder/autodiff.rs"] cc = ["@ZuseZ4"] [mentions."compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs"]