diff --git a/typify-impl/src/convert.rs b/typify-impl/src/convert.rs
index 938983be..2964e577 100644
--- a/typify-impl/src/convert.rs
+++ b/typify-impl/src/convert.rs
@@ -1675,58 +1675,6 @@ mod tests {
assert_eq!(type_space.iter_types().count(), 4);
}
- #[test]
- fn test_trivial_cycle() {
- #[derive(JsonSchema, Schema)]
- #[allow(dead_code)]
- struct A {
- a: Box,
- }
-
- validate_output::();
- }
-
- #[test]
- fn test_optional_trivial_cycle() {
- #[derive(JsonSchema, Schema)]
- #[allow(dead_code)]
- struct A {
- a: Option>,
- }
-
- validate_output::();
- }
-
- #[test]
- fn test_enum_trivial_cycles() {
- #[derive(JsonSchema, Schema)]
- #[allow(dead_code)]
- enum A {
- Variant0(u64),
- Variant1 {
- a: u64,
- b: Vec,
- rop: Option>,
- },
- Variant2 {
- a: Box,
- },
- Variant3(u64, Box),
- Variant4(Option>, String),
- }
-
- validate_output::();
- }
-
- #[test]
- fn test_newtype_trivial_cycle() {
- #[derive(JsonSchema, Schema)]
- #[allow(dead_code)]
- struct A(Box);
-
- validate_output::();
- }
-
#[test]
fn test_basic_option_flat() {
#[derive(JsonSchema, Schema)]
diff --git a/typify-impl/src/cycles.rs b/typify-impl/src/cycles.rs
new file mode 100644
index 00000000..bda7b34a
--- /dev/null
+++ b/typify-impl/src/cycles.rs
@@ -0,0 +1,248 @@
+// Copyright 2023 Oxide Computer Company
+
+use std::{
+ collections::{BTreeMap, BTreeSet},
+ ops::Range,
+};
+
+use crate::{
+ type_entry::{
+ TypeEntry, TypeEntryDetails, TypeEntryEnum, TypeEntryNewtype, TypeEntryStruct,
+ VariantDetails,
+ },
+ TypeId, TypeSpace,
+};
+
+impl TypeSpace {
+ /// We need to root out any containment cycles, breaking them by inserting
+ /// a `Box` type. Our choice of *where* to break cycles is more arbitrary
+ /// than optimal, but is well beyond sufficient.
+ pub fn break_cycles(&mut self, range: Range) {
+ enum Node {
+ Start {
+ type_id: TypeId,
+ },
+ Processing {
+ type_id: TypeId,
+ children_ids: Vec,
+ },
+ }
+
+ let mut visited = BTreeSet::::new();
+
+ for id in range {
+ let type_id = TypeId(id);
+
+ // This isn't strictly necessary, but we'll short-circuit some work
+ // by checking this right away.
+ if visited.contains(&type_id) {
+ continue;
+ }
+
+ let mut active = BTreeSet::::new();
+ let mut stack = Vec::::new();
+
+ active.insert(type_id.clone());
+ stack.push(Node::Start { type_id });
+
+ while let Some(top) = stack.last_mut() {
+ match top {
+ // Skip right to the end since we've already seen this type.
+ Node::Start { type_id } if visited.contains(type_id) => {
+ assert!(active.contains(type_id));
+
+ let type_id = type_id.clone();
+ *top = Node::Processing {
+ type_id,
+ children_ids: Vec::new(),
+ };
+ }
+
+ // Break any immediate cycles and queue up this type for
+ // descent into its child types.
+ Node::Start { type_id } => {
+ assert!(active.contains(type_id));
+
+ visited.insert(type_id.clone());
+
+ // Determine which child types form cycles--and
+ // therefore need to be snipped--and the rest--into
+ // which we should descend. We make this its own block
+ // to clarify the lifetime of the exclusive reference
+ // to the type. We don't really *need* to have an
+ // exclusive reference here, but there's no point in
+ // writing `get_child_ids` again for shared references.
+ let (snip, descend) = {
+ let type_entry = self.id_to_entry.get_mut(type_id).unwrap();
+
+ let child_ids = get_child_ids(type_entry)
+ .into_iter()
+ .map(|child_id| child_id.clone());
+
+ // If the child type is in active then we've found
+ // a cycle (otherwise we'll descend).
+ child_ids.partition::, _>(|child_id| active.contains(child_id))
+ };
+
+ // Note that while `snip` might contain duplicates,
+ // `id_to_box` is idempotent insofar as the same input
+ // TypeId will result in the same output TypeId. Ergo
+ // the resulting pairs from which we construct the
+ // mapping would contain exact duplicates; it would not
+ // contain two values associated with the same key.
+ let replace = snip
+ .into_iter()
+ .map(|type_id| {
+ let box_id = self.id_to_box(&type_id);
+
+ (type_id, box_id)
+ })
+ .collect::>();
+
+ // Break any cycles by reassigning the child type to a box.
+ let type_entry = self.id_to_entry.get_mut(type_id).unwrap();
+ let child_ids = get_child_ids(type_entry);
+ for child_id in child_ids {
+ if let Some(replace_id) = replace.get(child_id) {
+ *child_id = replace_id.clone();
+ }
+ }
+
+ // Descend into child types.
+ let node = Node::Processing {
+ type_id: type_id.clone(),
+ children_ids: descend,
+ };
+ *top = node;
+ }
+
+ // If there are children left, push the next child onto the
+ // stack. If there are none left, pop this type.
+ Node::Processing {
+ type_id,
+ children_ids,
+ } => {
+ if let Some(type_id) = children_ids.pop() {
+ // Descend into the next child node.
+ active.insert(type_id.clone());
+ stack.push(Node::Start { type_id });
+ } else {
+ // All done; remove the item from the active list
+ // and stack.
+ active.remove(type_id);
+ let _ = stack.pop();
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+/// For types that could potentially participate in a cycle, return a list of
+/// mutable references to the child types.
+fn get_child_ids(type_entry: &mut TypeEntry) -> Vec<&mut TypeId> {
+ match &mut type_entry.details {
+ TypeEntryDetails::Enum(TypeEntryEnum { variants, .. }) => variants
+ .iter_mut()
+ .flat_map(|variant| match &mut variant.details {
+ VariantDetails::Simple => Vec::new(),
+ VariantDetails::Item(type_id) => vec![type_id],
+ VariantDetails::Tuple(type_ids) => type_ids.iter_mut().collect(),
+ VariantDetails::Struct(properties) => properties
+ .iter_mut()
+ .map(|prop| &mut prop.type_id)
+ .collect(),
+ })
+ .collect::>(),
+
+ TypeEntryDetails::Struct(TypeEntryStruct { properties, .. }) => properties
+ .iter_mut()
+ .map(|prop| &mut prop.type_id)
+ .collect(),
+
+ TypeEntryDetails::Newtype(TypeEntryNewtype { type_id, .. }) => {
+ vec![type_id]
+ }
+
+ // Unnamed types that can participate in containment cycles.
+ TypeEntryDetails::Option(type_id) => vec![type_id],
+ TypeEntryDetails::Array(type_id, _) => vec![type_id],
+ TypeEntryDetails::Tuple(type_ids) => type_ids.iter_mut().collect(),
+
+ _ => Vec::new(),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use schema::Schema;
+ use schemars::JsonSchema;
+
+ use crate::test_util::validate_output;
+
+ #[test]
+ fn test_trivial_cycle() {
+ #[derive(JsonSchema, Schema)]
+ #[allow(dead_code)]
+ struct A {
+ a: Box,
+ }
+
+ validate_output::();
+ }
+
+ #[test]
+ fn test_optional_trivial_cycle() {
+ #[derive(JsonSchema, Schema)]
+ #[allow(dead_code)]
+ struct A {
+ a: Option>,
+ }
+
+ validate_output::();
+ }
+
+ #[test]
+ fn test_enum_trivial_cycles() {
+ #[derive(JsonSchema, Schema)]
+ #[allow(dead_code)]
+ enum A {
+ Variant0(u64),
+ Variant1 {
+ a: u64,
+ b: Vec,
+ rop: Option>,
+ },
+ Variant2 {
+ a: Box,
+ },
+ Variant3(u64, Box),
+ Variant4(Option>, String),
+ }
+
+ validate_output::();
+ }
+
+ #[test]
+ fn test_newtype_trivial_cycle() {
+ #[derive(JsonSchema, Schema)]
+ #[allow(dead_code)]
+ struct A(Box);
+
+ validate_output::();
+ }
+
+ #[test]
+ fn test_abab_cycle() {
+ #[derive(JsonSchema, Schema)]
+ #[allow(dead_code)]
+ struct A(B);
+
+ #[derive(JsonSchema, Schema)]
+ #[allow(dead_code)]
+ struct B(Box);
+
+ validate_output::();
+ }
+}
diff --git a/typify-impl/src/lib.rs b/typify-impl/src/lib.rs
index e8bad173..5fc9f82b 100644
--- a/typify-impl/src/lib.rs
+++ b/typify-impl/src/lib.rs
@@ -11,7 +11,7 @@ use schemars::schema::{Metadata, RootSchema, Schema};
use thiserror::Error;
use type_entry::{
StructPropertyState, TypeEntry, TypeEntryDetails, TypeEntryNative, TypeEntryNewtype,
- VariantDetails, WrappedValue,
+ WrappedValue,
};
use crate::util::{sanitize, Case};
@@ -21,6 +21,7 @@ mod test_util;
mod conversions;
mod convert;
+mod cycles;
mod defaults;
mod enums;
mod output;
@@ -446,33 +447,9 @@ impl TypeSpace {
}
}
- // Now that all references have been processed, we can do some
- // additional validation and processing.
- for index in 0..def_len {
- // This is slightly inefficient, but we make a copy of the type in
- // order to manipulate it without holding a reference on self.
- let type_id = TypeId(base_id + index);
- let mut type_entry = self.id_to_entry.get(&type_id).unwrap().clone();
-
- // TODO compute appropriate derives, taking care to account for
- // dependency cycles. Currently we're using a more minimal--safe--
- // set of derives than we might otherwise. This notably prevents us
- // from using a HashSet or BTreeSet type where we might like to.
-
- // Once all ref types are in, look for containment cycles that we
- // need to break with a Box. Note that we unconditionally
- // replace the type entry at the given ID regardless of whether the
- // type changes.
-
- // TODO: we've declared box_id here to avoid allocating it in the
- // ID space twice, but the dedup logic in assign_type() should
- // already address this. There's room to simplify here...
- let mut box_id = None;
- self.break_trivial_cyclic_refs(&type_id, &mut type_entry, &mut box_id);
-
- // Overwrite the entry regardless of whether we modified it.
- self.id_to_entry.insert(type_id, type_entry);
- }
+ // Eliminate cycles. It's sufficient to only start from referenced
+ // types as a reference is required to make a cycle.
+ self.break_cycles(base_id..base_id + def_len);
// Finalize all created types.
for index in base_id..self.next_id {
@@ -528,110 +505,6 @@ impl TypeSpace {
Ok(())
}
- /// If a type refers to itself, this creates a cycle that will eventually
- /// be emit as a Rust struct that cannot be constructed. Break those cycles
- /// here.
- ///
- /// While we aren't yet handling the general case of type containment
- /// cycles, it's not that bad to look at trivial cycles such as:
- ///
- /// 1) A type referring to itself: A -> A
- /// 2) A type optionally referring to itself: A -> Option
- /// 3) An enum variant referring to itself, either optionally or directly
- ///
- /// TODO currently only trivial cycles are broken. A more generic solution
- /// may be required, but it may also a point to ask oneself why such a
- /// complicated type is required :) A generic solution is difficult because
- /// certain cycles introduce a question of *where* to Box to break the
- /// cycle, and there's no one answer to this.
- fn check_for_cyclic_ref(
- &mut self,
- parent_type_id: &TypeId,
- child_type_id: &mut TypeId,
- box_id: &mut Option,
- ) {
- if *child_type_id == *parent_type_id {
- *child_type_id = box_id
- .get_or_insert_with(|| self.id_to_box(parent_type_id))
- .clone();
- } else {
- let mut child_type_entry = self.id_to_entry.get_mut(child_type_id).unwrap().clone();
-
- if let TypeEntryDetails::Option(option_type_id) = &mut child_type_entry.details {
- if *option_type_id == *parent_type_id {
- *option_type_id = box_id
- .get_or_insert_with(|| self.id_to_box(parent_type_id))
- .clone();
- }
- }
-
- let _ = self
- .id_to_entry
- .insert(child_type_id.clone(), child_type_entry);
- }
- }
-
- fn break_trivial_cyclic_refs(
- &mut self,
- parent_type_id: &TypeId,
- type_entry: &mut TypeEntry,
- box_id: &mut Option,
- ) {
- match &mut type_entry.details {
- // Look for the case where a struct property refers to the parent
- // type
- TypeEntryDetails::Struct(s) => {
- for prop in &mut s.properties {
- self.check_for_cyclic_ref(parent_type_id, &mut prop.type_id, box_id);
- }
- }
-
- // Look for the cases where an enum variant refers to the parent
- // type
- TypeEntryDetails::Enum(type_entry_enum) => {
- for variant in &mut type_entry_enum.variants {
- match &mut variant.details {
- // Simple variants will not refer to anything
- VariantDetails::Simple => {}
- // Look for a single-item tuple that refers to the
- // parent type.
- VariantDetails::Item(item_type_id) => {
- self.check_for_cyclic_ref(parent_type_id, item_type_id, box_id);
- }
- // Look for a tuple entry that refers to the parent
- // type.
- VariantDetails::Tuple(vec_type_id) => {
- for tuple_type_id in vec_type_id {
- self.check_for_cyclic_ref(parent_type_id, tuple_type_id, box_id);
- }
- }
- // Look for a struct property that refers to the parent
- // type.
- VariantDetails::Struct(vec_struct_property) => {
- for struct_property in vec_struct_property {
- let vec_type_id = &mut struct_property.type_id;
- self.check_for_cyclic_ref(parent_type_id, vec_type_id, box_id);
- }
- }
- }
- }
- }
-
- // Look for cases where a newtype refers to a parent type
- TypeEntryDetails::Newtype(new_type_entry) => {
- self.check_for_cyclic_ref(parent_type_id, &mut new_type_entry.type_id, box_id);
- }
-
- // Containers that can be size 0 are *not* cyclic references for that type
- TypeEntryDetails::Vec(_) => {}
- TypeEntryDetails::Set(_) => {}
- TypeEntryDetails::Map(..) => {}
-
- // Everything else can be ignored
- _ => {}
- }
- }
-
/// Add a new type and return a type identifier that may be used in
/// function signatures or embedded within other types.
pub fn add_type(&mut self, schema: &Schema) -> Result {
diff --git a/typify-impl/tests/vega.out b/typify-impl/tests/vega.out
index 6d831c9e..f4e7da38 100644
--- a/typify-impl/tests/vega.out
+++ b/typify-impl/tests/vega.out
@@ -22004,7 +22004,7 @@ impl From for NumberModifiersBand {
#[serde(untagged)]
pub enum NumberModifiersExponent {
Variant0(f64),
- Variant1(NumberValue),
+ Variant1(Box),
}
impl From<&NumberModifiersExponent> for NumberModifiersExponent {
fn from(value: &NumberModifiersExponent) -> Self {
@@ -22016,8 +22016,8 @@ impl From for NumberModifiersExponent {
Self::Variant0(value)
}
}
-impl From for NumberModifiersExponent {
- fn from(value: NumberValue) -> Self {
+impl From> for NumberModifiersExponent {
+ fn from(value: Box) -> Self {
Self::Variant1(value)
}
}
@@ -22025,7 +22025,7 @@ impl From for NumberModifiersExponent {
#[serde(untagged)]
pub enum NumberModifiersMult {
Variant0(f64),
- Variant1(NumberValue),
+ Variant1(Box),
}
impl From<&NumberModifiersMult> for NumberModifiersMult {
fn from(value: &NumberModifiersMult) -> Self {
@@ -22037,8 +22037,8 @@ impl From for NumberModifiersMult {
Self::Variant0(value)
}
}
-impl From for NumberModifiersMult {
- fn from(value: NumberValue) -> Self {
+impl From> for NumberModifiersMult {
+ fn from(value: Box) -> Self {
Self::Variant1(value)
}
}
@@ -22046,7 +22046,7 @@ impl From for NumberModifiersMult {
#[serde(untagged)]
pub enum NumberModifiersOffset {
Variant0(f64),
- Variant1(NumberValue),
+ Variant1(Box),
}
impl From<&NumberModifiersOffset> for NumberModifiersOffset {
fn from(value: &NumberModifiersOffset) -> Self {
@@ -22058,8 +22058,8 @@ impl From for NumberModifiersOffset {
Self::Variant0(value)
}
}
-impl From for NumberModifiersOffset {
- fn from(value: NumberValue) -> Self {
+impl From> for NumberModifiersOffset {
+ fn from(value: Box) -> Self {
Self::Variant1(value)
}
}
@@ -31414,7 +31414,7 @@ impl From<&Stream> for Stream {
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct StreamSubtype0 {
#[serde(default, skip_serializing_if = "Option::is_none")]
- pub between: Option<[Stream; 2usize]>,
+ pub between: Option<[Box; 2usize]>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub consume: Option,
#[serde(default, skip_serializing_if = "Option::is_none")]
@@ -31464,7 +31464,7 @@ pub enum StreamSubtype1 {
type_: String,
},
Variant1 {
- stream: Stream,
+ stream: Box,
},
Variant2 {
merge: Vec,