diff --git a/typify-impl/src/convert.rs b/typify-impl/src/convert.rs index 938983be..2964e577 100644 --- a/typify-impl/src/convert.rs +++ b/typify-impl/src/convert.rs @@ -1675,58 +1675,6 @@ mod tests { assert_eq!(type_space.iter_types().count(), 4); } - #[test] - fn test_trivial_cycle() { - #[derive(JsonSchema, Schema)] - #[allow(dead_code)] - struct A { - a: Box, - } - - validate_output::(); - } - - #[test] - fn test_optional_trivial_cycle() { - #[derive(JsonSchema, Schema)] - #[allow(dead_code)] - struct A { - a: Option>, - } - - validate_output::(); - } - - #[test] - fn test_enum_trivial_cycles() { - #[derive(JsonSchema, Schema)] - #[allow(dead_code)] - enum A { - Variant0(u64), - Variant1 { - a: u64, - b: Vec, - rop: Option>, - }, - Variant2 { - a: Box, - }, - Variant3(u64, Box), - Variant4(Option>, String), - } - - validate_output::(); - } - - #[test] - fn test_newtype_trivial_cycle() { - #[derive(JsonSchema, Schema)] - #[allow(dead_code)] - struct A(Box); - - validate_output::(); - } - #[test] fn test_basic_option_flat() { #[derive(JsonSchema, Schema)] diff --git a/typify-impl/src/cycles.rs b/typify-impl/src/cycles.rs new file mode 100644 index 00000000..bda7b34a --- /dev/null +++ b/typify-impl/src/cycles.rs @@ -0,0 +1,248 @@ +// Copyright 2023 Oxide Computer Company + +use std::{ + collections::{BTreeMap, BTreeSet}, + ops::Range, +}; + +use crate::{ + type_entry::{ + TypeEntry, TypeEntryDetails, TypeEntryEnum, TypeEntryNewtype, TypeEntryStruct, + VariantDetails, + }, + TypeId, TypeSpace, +}; + +impl TypeSpace { + /// We need to root out any containment cycles, breaking them by inserting + /// a `Box` type. Our choice of *where* to break cycles is more arbitrary + /// than optimal, but is well beyond sufficient. + pub fn break_cycles(&mut self, range: Range) { + enum Node { + Start { + type_id: TypeId, + }, + Processing { + type_id: TypeId, + children_ids: Vec, + }, + } + + let mut visited = BTreeSet::::new(); + + for id in range { + let type_id = TypeId(id); + + // This isn't strictly necessary, but we'll short-circuit some work + // by checking this right away. + if visited.contains(&type_id) { + continue; + } + + let mut active = BTreeSet::::new(); + let mut stack = Vec::::new(); + + active.insert(type_id.clone()); + stack.push(Node::Start { type_id }); + + while let Some(top) = stack.last_mut() { + match top { + // Skip right to the end since we've already seen this type. + Node::Start { type_id } if visited.contains(type_id) => { + assert!(active.contains(type_id)); + + let type_id = type_id.clone(); + *top = Node::Processing { + type_id, + children_ids: Vec::new(), + }; + } + + // Break any immediate cycles and queue up this type for + // descent into its child types. + Node::Start { type_id } => { + assert!(active.contains(type_id)); + + visited.insert(type_id.clone()); + + // Determine which child types form cycles--and + // therefore need to be snipped--and the rest--into + // which we should descend. We make this its own block + // to clarify the lifetime of the exclusive reference + // to the type. We don't really *need* to have an + // exclusive reference here, but there's no point in + // writing `get_child_ids` again for shared references. + let (snip, descend) = { + let type_entry = self.id_to_entry.get_mut(type_id).unwrap(); + + let child_ids = get_child_ids(type_entry) + .into_iter() + .map(|child_id| child_id.clone()); + + // If the child type is in active then we've found + // a cycle (otherwise we'll descend). + child_ids.partition::, _>(|child_id| active.contains(child_id)) + }; + + // Note that while `snip` might contain duplicates, + // `id_to_box` is idempotent insofar as the same input + // TypeId will result in the same output TypeId. Ergo + // the resulting pairs from which we construct the + // mapping would contain exact duplicates; it would not + // contain two values associated with the same key. + let replace = snip + .into_iter() + .map(|type_id| { + let box_id = self.id_to_box(&type_id); + + (type_id, box_id) + }) + .collect::>(); + + // Break any cycles by reassigning the child type to a box. + let type_entry = self.id_to_entry.get_mut(type_id).unwrap(); + let child_ids = get_child_ids(type_entry); + for child_id in child_ids { + if let Some(replace_id) = replace.get(child_id) { + *child_id = replace_id.clone(); + } + } + + // Descend into child types. + let node = Node::Processing { + type_id: type_id.clone(), + children_ids: descend, + }; + *top = node; + } + + // If there are children left, push the next child onto the + // stack. If there are none left, pop this type. + Node::Processing { + type_id, + children_ids, + } => { + if let Some(type_id) = children_ids.pop() { + // Descend into the next child node. + active.insert(type_id.clone()); + stack.push(Node::Start { type_id }); + } else { + // All done; remove the item from the active list + // and stack. + active.remove(type_id); + let _ = stack.pop(); + } + } + } + } + } + } +} + +/// For types that could potentially participate in a cycle, return a list of +/// mutable references to the child types. +fn get_child_ids(type_entry: &mut TypeEntry) -> Vec<&mut TypeId> { + match &mut type_entry.details { + TypeEntryDetails::Enum(TypeEntryEnum { variants, .. }) => variants + .iter_mut() + .flat_map(|variant| match &mut variant.details { + VariantDetails::Simple => Vec::new(), + VariantDetails::Item(type_id) => vec![type_id], + VariantDetails::Tuple(type_ids) => type_ids.iter_mut().collect(), + VariantDetails::Struct(properties) => properties + .iter_mut() + .map(|prop| &mut prop.type_id) + .collect(), + }) + .collect::>(), + + TypeEntryDetails::Struct(TypeEntryStruct { properties, .. }) => properties + .iter_mut() + .map(|prop| &mut prop.type_id) + .collect(), + + TypeEntryDetails::Newtype(TypeEntryNewtype { type_id, .. }) => { + vec![type_id] + } + + // Unnamed types that can participate in containment cycles. + TypeEntryDetails::Option(type_id) => vec![type_id], + TypeEntryDetails::Array(type_id, _) => vec![type_id], + TypeEntryDetails::Tuple(type_ids) => type_ids.iter_mut().collect(), + + _ => Vec::new(), + } +} + +#[cfg(test)] +mod tests { + use schema::Schema; + use schemars::JsonSchema; + + use crate::test_util::validate_output; + + #[test] + fn test_trivial_cycle() { + #[derive(JsonSchema, Schema)] + #[allow(dead_code)] + struct A { + a: Box, + } + + validate_output::(); + } + + #[test] + fn test_optional_trivial_cycle() { + #[derive(JsonSchema, Schema)] + #[allow(dead_code)] + struct A { + a: Option>, + } + + validate_output::(); + } + + #[test] + fn test_enum_trivial_cycles() { + #[derive(JsonSchema, Schema)] + #[allow(dead_code)] + enum A { + Variant0(u64), + Variant1 { + a: u64, + b: Vec, + rop: Option>, + }, + Variant2 { + a: Box, + }, + Variant3(u64, Box), + Variant4(Option>, String), + } + + validate_output::(); + } + + #[test] + fn test_newtype_trivial_cycle() { + #[derive(JsonSchema, Schema)] + #[allow(dead_code)] + struct A(Box); + + validate_output::(); + } + + #[test] + fn test_abab_cycle() { + #[derive(JsonSchema, Schema)] + #[allow(dead_code)] + struct A(B); + + #[derive(JsonSchema, Schema)] + #[allow(dead_code)] + struct B(Box); + + validate_output::(); + } +} diff --git a/typify-impl/src/lib.rs b/typify-impl/src/lib.rs index e8bad173..5fc9f82b 100644 --- a/typify-impl/src/lib.rs +++ b/typify-impl/src/lib.rs @@ -11,7 +11,7 @@ use schemars::schema::{Metadata, RootSchema, Schema}; use thiserror::Error; use type_entry::{ StructPropertyState, TypeEntry, TypeEntryDetails, TypeEntryNative, TypeEntryNewtype, - VariantDetails, WrappedValue, + WrappedValue, }; use crate::util::{sanitize, Case}; @@ -21,6 +21,7 @@ mod test_util; mod conversions; mod convert; +mod cycles; mod defaults; mod enums; mod output; @@ -446,33 +447,9 @@ impl TypeSpace { } } - // Now that all references have been processed, we can do some - // additional validation and processing. - for index in 0..def_len { - // This is slightly inefficient, but we make a copy of the type in - // order to manipulate it without holding a reference on self. - let type_id = TypeId(base_id + index); - let mut type_entry = self.id_to_entry.get(&type_id).unwrap().clone(); - - // TODO compute appropriate derives, taking care to account for - // dependency cycles. Currently we're using a more minimal--safe-- - // set of derives than we might otherwise. This notably prevents us - // from using a HashSet or BTreeSet type where we might like to. - - // Once all ref types are in, look for containment cycles that we - // need to break with a Box. Note that we unconditionally - // replace the type entry at the given ID regardless of whether the - // type changes. - - // TODO: we've declared box_id here to avoid allocating it in the - // ID space twice, but the dedup logic in assign_type() should - // already address this. There's room to simplify here... - let mut box_id = None; - self.break_trivial_cyclic_refs(&type_id, &mut type_entry, &mut box_id); - - // Overwrite the entry regardless of whether we modified it. - self.id_to_entry.insert(type_id, type_entry); - } + // Eliminate cycles. It's sufficient to only start from referenced + // types as a reference is required to make a cycle. + self.break_cycles(base_id..base_id + def_len); // Finalize all created types. for index in base_id..self.next_id { @@ -528,110 +505,6 @@ impl TypeSpace { Ok(()) } - /// If a type refers to itself, this creates a cycle that will eventually - /// be emit as a Rust struct that cannot be constructed. Break those cycles - /// here. - /// - /// While we aren't yet handling the general case of type containment - /// cycles, it's not that bad to look at trivial cycles such as: - /// - /// 1) A type referring to itself: A -> A - /// 2) A type optionally referring to itself: A -> Option - /// 3) An enum variant referring to itself, either optionally or directly - /// - /// TODO currently only trivial cycles are broken. A more generic solution - /// may be required, but it may also a point to ask oneself why such a - /// complicated type is required :) A generic solution is difficult because - /// certain cycles introduce a question of *where* to Box to break the - /// cycle, and there's no one answer to this. - fn check_for_cyclic_ref( - &mut self, - parent_type_id: &TypeId, - child_type_id: &mut TypeId, - box_id: &mut Option, - ) { - if *child_type_id == *parent_type_id { - *child_type_id = box_id - .get_or_insert_with(|| self.id_to_box(parent_type_id)) - .clone(); - } else { - let mut child_type_entry = self.id_to_entry.get_mut(child_type_id).unwrap().clone(); - - if let TypeEntryDetails::Option(option_type_id) = &mut child_type_entry.details { - if *option_type_id == *parent_type_id { - *option_type_id = box_id - .get_or_insert_with(|| self.id_to_box(parent_type_id)) - .clone(); - } - } - - let _ = self - .id_to_entry - .insert(child_type_id.clone(), child_type_entry); - } - } - - fn break_trivial_cyclic_refs( - &mut self, - parent_type_id: &TypeId, - type_entry: &mut TypeEntry, - box_id: &mut Option, - ) { - match &mut type_entry.details { - // Look for the case where a struct property refers to the parent - // type - TypeEntryDetails::Struct(s) => { - for prop in &mut s.properties { - self.check_for_cyclic_ref(parent_type_id, &mut prop.type_id, box_id); - } - } - - // Look for the cases where an enum variant refers to the parent - // type - TypeEntryDetails::Enum(type_entry_enum) => { - for variant in &mut type_entry_enum.variants { - match &mut variant.details { - // Simple variants will not refer to anything - VariantDetails::Simple => {} - // Look for a single-item tuple that refers to the - // parent type. - VariantDetails::Item(item_type_id) => { - self.check_for_cyclic_ref(parent_type_id, item_type_id, box_id); - } - // Look for a tuple entry that refers to the parent - // type. - VariantDetails::Tuple(vec_type_id) => { - for tuple_type_id in vec_type_id { - self.check_for_cyclic_ref(parent_type_id, tuple_type_id, box_id); - } - } - // Look for a struct property that refers to the parent - // type. - VariantDetails::Struct(vec_struct_property) => { - for struct_property in vec_struct_property { - let vec_type_id = &mut struct_property.type_id; - self.check_for_cyclic_ref(parent_type_id, vec_type_id, box_id); - } - } - } - } - } - - // Look for cases where a newtype refers to a parent type - TypeEntryDetails::Newtype(new_type_entry) => { - self.check_for_cyclic_ref(parent_type_id, &mut new_type_entry.type_id, box_id); - } - - // Containers that can be size 0 are *not* cyclic references for that type - TypeEntryDetails::Vec(_) => {} - TypeEntryDetails::Set(_) => {} - TypeEntryDetails::Map(..) => {} - - // Everything else can be ignored - _ => {} - } - } - /// Add a new type and return a type identifier that may be used in /// function signatures or embedded within other types. pub fn add_type(&mut self, schema: &Schema) -> Result { diff --git a/typify-impl/tests/vega.out b/typify-impl/tests/vega.out index 6d831c9e..f4e7da38 100644 --- a/typify-impl/tests/vega.out +++ b/typify-impl/tests/vega.out @@ -22004,7 +22004,7 @@ impl From for NumberModifiersBand { #[serde(untagged)] pub enum NumberModifiersExponent { Variant0(f64), - Variant1(NumberValue), + Variant1(Box), } impl From<&NumberModifiersExponent> for NumberModifiersExponent { fn from(value: &NumberModifiersExponent) -> Self { @@ -22016,8 +22016,8 @@ impl From for NumberModifiersExponent { Self::Variant0(value) } } -impl From for NumberModifiersExponent { - fn from(value: NumberValue) -> Self { +impl From> for NumberModifiersExponent { + fn from(value: Box) -> Self { Self::Variant1(value) } } @@ -22025,7 +22025,7 @@ impl From for NumberModifiersExponent { #[serde(untagged)] pub enum NumberModifiersMult { Variant0(f64), - Variant1(NumberValue), + Variant1(Box), } impl From<&NumberModifiersMult> for NumberModifiersMult { fn from(value: &NumberModifiersMult) -> Self { @@ -22037,8 +22037,8 @@ impl From for NumberModifiersMult { Self::Variant0(value) } } -impl From for NumberModifiersMult { - fn from(value: NumberValue) -> Self { +impl From> for NumberModifiersMult { + fn from(value: Box) -> Self { Self::Variant1(value) } } @@ -22046,7 +22046,7 @@ impl From for NumberModifiersMult { #[serde(untagged)] pub enum NumberModifiersOffset { Variant0(f64), - Variant1(NumberValue), + Variant1(Box), } impl From<&NumberModifiersOffset> for NumberModifiersOffset { fn from(value: &NumberModifiersOffset) -> Self { @@ -22058,8 +22058,8 @@ impl From for NumberModifiersOffset { Self::Variant0(value) } } -impl From for NumberModifiersOffset { - fn from(value: NumberValue) -> Self { +impl From> for NumberModifiersOffset { + fn from(value: Box) -> Self { Self::Variant1(value) } } @@ -31414,7 +31414,7 @@ impl From<&Stream> for Stream { #[derive(Clone, Debug, Deserialize, Serialize)] pub struct StreamSubtype0 { #[serde(default, skip_serializing_if = "Option::is_none")] - pub between: Option<[Stream; 2usize]>, + pub between: Option<[Box; 2usize]>, #[serde(default, skip_serializing_if = "Option::is_none")] pub consume: Option, #[serde(default, skip_serializing_if = "Option::is_none")] @@ -31464,7 +31464,7 @@ pub enum StreamSubtype1 { type_: String, }, Variant1 { - stream: Stream, + stream: Box, }, Variant2 { merge: Vec,