Skip to content

Commit ff2de10

Browse files
refactor(dataset): replace free-form attributes with typed structs
Introduce DatasetAttributes and PathogenAttributes structs with typed fields for recognized keys (name, reference name, reference accession, deprecated, experimental) plus #[serde(flatten)] for extensibility.
1 parent 211d265 commit ff2de10

File tree

4 files changed

+101
-41
lines changed

4 files changed

+101
-41
lines changed

packages/nextclade-cli/src/dataset/dataset_table.rs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,13 +35,7 @@ pub fn format_dataset_table(filtered: &[Dataset]) -> String {
3535

3636
let attrs = attributes
3737
.iter()
38-
.map(|(key, val)| {
39-
format!(
40-
"{}={}",
41-
surround_with_quotes(key),
42-
surround_with_quotes(val.to_string())
43-
)
44-
})
38+
.map(|(key, val)| format!("{}={}", surround_with_quotes(key), surround_with_quotes(val)))
4539
.join("\n");
4640

4741
let versions = dataset.versions.iter().map(|ver| &ver.tag).join("\n");

packages/nextclade/src/analyze/virus_properties.rs

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,41 @@ use validator::Validate;
2929
const PATHOGEN_JSON_SCHEMA_VERSION_FROM: &str = "3.0.0";
3030
const PATHOGEN_JSON_SCHEMA_VERSION_TO: &str = "3.0.0";
3131

32+
/// Pathogen metadata attributes with recognized keys and extensibility
33+
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize, schemars::JsonSchema)]
34+
pub struct PathogenAttributes {
35+
/// Human-readable dataset name
36+
#[serde(default, skip_serializing_if = "Option::is_none")]
37+
pub name: Option<String>,
38+
39+
/// Name of the reference sequence
40+
#[serde(rename = "reference name", default, skip_serializing_if = "Option::is_none")]
41+
pub reference_name: Option<String>,
42+
43+
/// Accession number of the reference sequence
44+
#[serde(rename = "reference accession", default, skip_serializing_if = "Option::is_none")]
45+
pub reference_accession: Option<String>,
46+
47+
/// Additional custom attributes
48+
#[serde(flatten)]
49+
pub other: BTreeMap<String, AnyType>,
50+
}
51+
52+
impl PathogenAttributes {
53+
pub fn is_default(&self) -> bool {
54+
self == &Self::default()
55+
}
56+
}
57+
3258
/// pathogen.json dataset file. Contains external configuration and data specific for a particular pathogen.
3359
#[derive(Clone, Default, Debug, Eq, PartialEq, Serialize, Deserialize, schemars::JsonSchema)]
3460
#[serde(rename_all = "camelCase")]
3561
#[schemars(title = "PathogenJson", example = "VirusProperties::example")]
3662
pub struct VirusProperties {
3763
pub schema_version: String,
3864

39-
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
40-
pub attributes: BTreeMap<String, AnyType>,
65+
#[serde(default, skip_serializing_if = "PathogenAttributes::is_default")]
66+
pub attributes: PathogenAttributes,
4167

4268
#[serde(default, skip_serializing_if = "Vec::is_empty")]
4369
pub shortcuts: Vec<String>,
@@ -325,11 +351,13 @@ impl VirusProperties {
325351
pub fn example() -> Self {
326352
Self {
327353
schema_version: o!("3.0.0"),
328-
attributes: btreemap! {
329-
o!("name") => AnyType::String(o!("Influenza A H3N2 HA")),
330-
o!("segment") => AnyType::String(o!("ha")),
331-
o!("reference accession") => AnyType::String(o!("CY163680")),
332-
o!("reference name") => AnyType::String(o!("A/Wisconsin/67/2005-egg")),
354+
attributes: PathogenAttributes {
355+
name: Some(o!("Influenza A H3N2 HA")),
356+
reference_name: Some(o!("A/Wisconsin/67/2005-egg")),
357+
reference_accession: Some(o!("CY163680")),
358+
other: btreemap! {
359+
o!("segment") => AnyType::String(o!("ha")),
360+
},
333361
},
334362
shortcuts: vec_of_owned!["flu_h3n2_ha_broad", "nextstrain/flu/h3n2/ha/wisconsin-67-2005"],
335363
meta: DatasetMeta::default(),

packages/nextclade/src/io/dataset.rs

Lines changed: 62 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,61 @@ use std::cmp::Ordering;
1212
use std::collections::BTreeMap;
1313
use std::ops::{Deref, DerefMut};
1414

15+
/// Dataset metadata attributes with recognized keys and extensibility
16+
#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize, JsonSchema)]
17+
pub struct DatasetAttributes {
18+
/// Human-readable dataset name
19+
#[serde(default, skip_serializing_if = "Option::is_none")]
20+
pub name: Option<String>,
21+
22+
/// Name of the reference sequence
23+
#[serde(rename = "reference name", default, skip_serializing_if = "Option::is_none")]
24+
pub reference_name: Option<String>,
25+
26+
/// Accession number of the reference sequence
27+
#[serde(rename = "reference accession", default, skip_serializing_if = "Option::is_none")]
28+
pub reference_accession: Option<String>,
29+
30+
/// If true, dataset is deprecated and excluded from listings by default.
31+
/// Authors mark a dataset as deprecated to indicate it will no longer be updated or supported.
32+
/// Use `--include-deprecated` CLI flag to show deprecated datasets.
33+
#[serde(default, skip_serializing_if = "Option::is_none")]
34+
pub deprecated: Option<bool>,
35+
36+
/// If true, dataset is experimental and excluded with `--no-experimental` CLI flag.
37+
/// Authors mark a dataset as experimental when development is still in progress,
38+
/// or if the dataset is incomplete or of lower quality than usual. Use at own risk.
39+
#[serde(default, skip_serializing_if = "Option::is_none")]
40+
pub experimental: Option<bool>,
41+
42+
/// Additional custom attributes
43+
#[serde(flatten)]
44+
pub other: BTreeMap<String, AnyType>,
45+
}
46+
47+
impl DatasetAttributes {
48+
pub fn is_default(&self) -> bool {
49+
self == &Self::default()
50+
}
51+
52+
/// Iterate over all attributes as (key, value) pairs for display
53+
pub fn iter(&self) -> impl Iterator<Item = (&str, String)> {
54+
let typed = [
55+
self.name.as_ref().map(|v| ("name", v.clone())),
56+
self.reference_name.as_ref().map(|v| ("reference name", v.clone())),
57+
self.reference_accession.as_ref().map(|v| ("reference accession", v.clone())),
58+
self.deprecated.map(|v| ("deprecated", v.to_string())),
59+
self.experimental.map(|v| ("experimental", v.to_string())),
60+
]
61+
.into_iter()
62+
.flatten();
63+
64+
let other = self.other.iter().map(|(k, v)| (k.as_str(), v.to_string()));
65+
66+
chain!(typed, other)
67+
}
68+
}
69+
1570
const INDEX_JSON_SCHEMA_VERSION_FROM: &str = "3.0.0";
1671
const INDEX_JSON_SCHEMA_VERSION_TO: &str = "3.0.0";
1772

@@ -114,8 +169,8 @@ pub struct Dataset {
114169
#[serde(default, skip_serializing_if = "Vec::is_empty")]
115170
pub shortcuts: Vec<String>,
116171

117-
#[serde(default, skip_serializing_if = "BTreeMap::is_empty")]
118-
pub attributes: BTreeMap<String, AnyType>,
172+
#[serde(default, skip_serializing_if = "DatasetAttributes::is_default")]
173+
pub attributes: DatasetAttributes,
119174

120175
#[serde(default, skip_serializing_if = "DatasetMeta::is_default")]
121176
pub meta: DatasetMeta,
@@ -148,7 +203,7 @@ impl Dataset {
148203
}
149204

150205
pub fn name(&self) -> Option<&str> {
151-
self.attributes.get("name").and_then(AnyType::as_str_maybe)
206+
self.attributes.name.as_deref()
152207
}
153208

154209
pub fn shortcuts(&self) -> impl Iterator<Item = &str> {
@@ -175,30 +230,19 @@ impl Dataset {
175230
}
176231

177232
pub fn ref_name(&self) -> Option<&str> {
178-
self.attributes.get("reference name").and_then(AnyType::as_str_maybe)
233+
self.attributes.reference_name.as_deref()
179234
}
180235

181236
pub fn ref_accession(&self) -> Option<&str> {
182-
self
183-
.attributes
184-
.get("reference accession")
185-
.and_then(AnyType::as_str_maybe)
237+
self.attributes.reference_accession.as_deref()
186238
}
187239

188240
pub fn deprecated(&self) -> bool {
189-
self
190-
.attributes
191-
.get("deprecated")
192-
.and_then(AnyType::as_bool_maybe)
193-
.unwrap_or(false)
241+
self.attributes.deprecated.unwrap_or(false)
194242
}
195243

196244
pub fn experimental(&self) -> bool {
197-
self
198-
.attributes
199-
.get("experimental")
200-
.and_then(AnyType::as_bool_maybe)
201-
.unwrap_or(false)
245+
self.attributes.experimental.unwrap_or(false)
202246
}
203247

204248
pub fn is_community(&self) -> bool {

packages/nextclade/src/run/nextclade_wasm.rs

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ use crate::tree::tree::{AuspiceGraph, AuspiceRefNodesDesc, AuspiceTree, CladeNod
2121
use crate::tree::tree_builder::graph_attach_new_nodes_in_place;
2222
use crate::tree::tree_preprocess::graph_preprocess_in_place;
2323
use crate::types::outputs::NextcladeOutputs;
24-
use crate::utils::any::AnyType;
2524
use crate::utils::option::{OptionMapRefFallible, find_some};
2625
use eyre::{Report, WrapErr, eyre};
2726
use itertools::Itertools;
@@ -67,14 +66,9 @@ impl NextcladeParams {
6766
} else {
6867
let ref_name = virus_properties
6968
.attributes
70-
.get("reference name")
71-
.cloned()
72-
.unwrap_or_else(|| AnyType::String("reference".to_owned()))
73-
.as_str()
74-
.wrap_err(
75-
"When reading Auspice JSON v2 `.meta.extensions.nextclade.pathogen.attributes[\"reference name\"]`",
76-
)?
77-
.to_owned();
69+
.reference_name
70+
.clone()
71+
.unwrap_or_else(|| "reference".to_owned());
7872

7973
let ref_seq = auspice_json.root_sequence.as_ref().and_then(|root_sequence| root_sequence.get("nuc"))
8074
.ok_or_else(|| eyre!("Auspice JSON v2 is used as input dataset, but does not contain required reference sequence field (.root_sequence.nuc) and a reference sequence is not provided any other way."))?.to_owned();

0 commit comments

Comments
 (0)