Skip to content

Commit f106408

Browse files
authored
fix: update noodles to 0.97 and annonars to 0.42.5 (#735)
1 parent 263cabd commit f106408

20 files changed

+1660
-1668
lines changed

Cargo.lock

Lines changed: 450 additions & 377 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ path = "src/main.rs"
2727

2828
[dependencies]
2929
actix-web = "4.10"
30-
annonars = "0.42.4"
30+
annonars = "0.42.5"
3131
anyhow = "1.0"
3232
async-compression = { version = "0.4", features = ["tokio", "gzip"] }
3333
bio = "2.0.3"
@@ -86,7 +86,7 @@ version = "0.3.3"
8686
optional = true
8787

8888
[dependencies.noodles]
89-
version = "0.77.0"
89+
version = "0.97.0"
9090
features = ["async", "bgzf", "core", "vcf", "bcf", "csi", "fasta", "tabix"]
9191

9292
[build-dependencies]

src/annotate/seqvars/ann.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1001,7 +1001,6 @@ impl std::fmt::Display for AnnField {
10011001

10021002
#[cfg(test)]
10031003
mod test {
1004-
use itertools::Itertools;
10051004
use pretty_assertions::assert_eq;
10061005
use std::str::FromStr;
10071006

src/annotate/seqvars/mod.rs

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1669,28 +1669,6 @@ pub struct ClinvarAnnotator {
16691669
db: DBWithThreadMode<rocksdb::MultiThreaded>,
16701670
}
16711671

1672-
/// Helper code for percent encoding of strings.
1673-
///
1674-
/// cf. https://github.com/varfish-org/varfish-server-worker/issues/485
1675-
mod vcf_encoding {
1676-
use percent_encoding::{utf8_percent_encode, AsciiSet, PercentEncode, CONTROLS};
1677-
1678-
// § 1.2 "Character encoding, non-printable characters and characters with special meaning" (2023-08-23)
1679-
const PERCENT_ENCODE_SET: &AsciiSet = &CONTROLS
1680-
.add(b':')
1681-
.add(b';')
1682-
.add(b'=')
1683-
.add(b'%')
1684-
.add(b',')
1685-
.add(b'\r')
1686-
.add(b'\n')
1687-
.add(b'\t');
1688-
1689-
pub(super) fn percent_encode(s: &str) -> PercentEncode<'_> {
1690-
utf8_percent_encode(s, PERCENT_ENCODE_SET)
1691-
}
1692-
}
1693-
16941672
impl ClinvarAnnotator {
16951673
pub fn new(db: DBWithThreadMode<rocksdb::MultiThreaded>) -> Self {
16961674
Self { db }
@@ -1750,12 +1728,7 @@ impl ClinvarAnnotator {
17501728
Some(field::Value::Array(field::value::Array::String(
17511729
clinvar_germline_classifications
17521730
.into_iter()
1753-
.map(|value| {
1754-
// Manually encode until the following is fixed.
1755-
//
1756-
// https://github.com/varfish-org/varfish-server-worker/issues/485
1757-
Some(vcf_encoding::percent_encode(&value).to_string())
1758-
})
1731+
.map(Some)
17591732
.collect::<Vec<_>>(),
17601733
))),
17611734
);

src/annotate/seqvars/snapshots/mehari__annotate__seqvars__test__smoke_test_output_vcf.snap

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,4 +138,4 @@ expression: actual
138138
##bcftools_concatVersion=1.9+htslib-1.9
139139
##bcftools_concatCommand=concat --allow-overlaps -d none -o output/out.vcf.gz -O z job_out.0.d/out/tmp_0.vcf.gz job_out.1.d/out/tmp_1.vcf.gz job_out.2.d/out/tmp_2.vcf.gz job_out.3.d/out/tmp_3.vcf.gz job_out.4.d/out/tmp_4.vcf.gz job_out.5.d/out/tmp_5.vcf.gz job_out.6.d/out/tmp_6.vcf.gz job_out.7.d/out/tmp_7.vcf.gz job_out.8.d/out/tmp_8.vcf.gz job_out.9.d/out/tmp_9.vcf.gz job_out.10.d/out/tmp_10.vcf.gz job_out.11.d/out/tmp_11.vcf.gz job_out.12.d/out/tmp_12.vcf.gz job_out.13.d/out/tmp_13.vcf.gz job_out.14.d/out/tmp_14.vcf.gz job_out.15.d/out/tmp_15.vcf.gz job_out.16.d/out/tmp_16.vcf.gz job_out.17.d/out/tmp_17.vcf.gz job_out.18.d/out/tmp_18.vcf.gz job_out.19.d/out/tmp_19.vcf.gz job_out.20.d/out/tmp_20.vcf.gz job_out.21.d/out/tmp_21.vcf.gz job_out.22.d/out/tmp_22.vcf.gz job_out.23.d/out/tmp_23.vcf.gz job_out.24.d/out/tmp_24.vcf.gz job_out.25.d/out/tmp_25.vcf.gz job_out.26.d/out/tmp_26.vcf.gz job_out.27.d/out/tmp_27.vcf.gz job_out.28.d/out/tmp_28.vcf.gz job_out.29.d/out/tmp_29.vcf.gz job_out.30.d/out/tmp_30.vcf.gz job_out.31.d/out/tmp_31.vcf.gz job_out.32.d/out/tmp_32.vcf.gz job_out.33.d/out/tmp_33.vcf.gz job_out.34.d/out/tmp_34.vcf.gz job_out.35.d/out/tmp_35.vcf.gz job_out.36.d/out/tmp_36.vcf.gz job_out.37.d/out/tmp_37.vcf.gz job_out.38.d/out/tmp_38.vcf.gz job_out.39.d/out/tmp_39.vcf.gz job_out.40.d/out/tmp_40.vcf.gz job_out.41.d/out/tmp_41.vcf.gz job_out.42.d/out/tmp_42.vcf.gz job_out.43.d/out/tmp_43.vcf.gz job_out.44.d/out/tmp_44.vcf.gz job_out.45.d/out/tmp_45.vcf.gz job_out.46.d/out/tmp_46.vcf.gz job_out.47.d/out/tmp_47.vcf.gz job_out.48.d/out/tmp_48.vcf.gz job_out.49.d/out/tmp_49.vcf.gz job_out.50.d/out/tmp_50.vcf.gz job_out.51.d/out/tmp_51.vcf.gz job_out.52.d/out/tmp_52.vcf.gz job_out.53.d/out/tmp_53.vcf.gz job_out.54.d/out/tmp_54.vcf.gz job_out.55.d/out/tmp_55.vcf.gz job_out.56.d/out/tmp_56.vcf.gz job_out.57.d/out/tmp_57.vcf.gz job_out.58.d/out/tmp_58.vcf.gz job_out.59.d/out/tmp_59.vcf.gz job_out.60.d/out/tmp_60.vcf.gz job_out.61.d/out/tmp_61.vcf.gz job_out.62.d/out/tmp_62.vcf.gz job_out.63.d/out/tmp_63.vcf.gz job_out.64.d/out/tmp_64.vcf.gz job_out.65.d/out/tmp_65.vcf.gz job_out.66.d/out/tmp_66.vcf.gz job_out.67.d/out/tmp_67.vcf.gz job_out.68.d/out/tmp_68.vcf.gz job_out.69.d/out/tmp_69.vcf.gz job_out.70.d/out/tmp_70.vcf.gz job_out.71.d/out/tmp_71.vcf.gz job_out.72.d/out/tmp_72.vcf.gz job_out.73.d/out/tmp_73.vcf.gz job_out.74.d/out/tmp_74.vcf.gz job_out.75.d/out/tmp_75.vcf.gz job_out.76.d/out/tmp_76.vcf.gz job_out.77.d/out/tmp_77.vcf.gz job_out.78.d/out/tmp_78.vcf.gz job_out.79.d/out/tmp_79.vcf.gz job_out.80.d/out/tmp_80.vcf.gz job_out.81.d/out/tmp_81.vcf.gz job_out.82.d/out/tmp_82.vcf.gz job_out.83.d/out/tmp_83.vcf.gz job_out.84.d/out/tmp_84.vcf.gz job_out.85.d/out/tmp_85.vcf.gz job_out.86.d/out/tmp_86.vcf.gz job_out.87.d/out/tmp_87.vcf.gz job_out.88.d/out/tmp_88.vcf.gz job_out.89.d/out/tmp_89.vcf.gz job_out.90.d/out/tmp_90.vcf.gz job_out.91.d/out/tmp_91.vcf.gz job_out.92.d/out/tmp_92.vcf.gz job_out.93.d/out/tmp_93.vcf.gz job_out.94.d/out/tmp_94.vcf.gz job_out.95.d/out/tmp_95.vcf.gz job_out.96.d/out/tmp_96.vcf.gz job_out.97.d/out/tmp_97.vcf.gz job_out.98.d/out/tmp_98.vcf.gz job_out.99.d/out/tmp_99.vcf.gz job_out.100.d/out/tmp_100.vcf.gz job_out.101.d/out/tmp_101.vcf.gz job_out.102.d/out/tmp_102.vcf.gz job_out.103.d/out/tmp_103.vcf.gz job_out.104.d/out/tmp_104.vcf.gz job_out.105.d/out/tmp_105.vcf.gz job_out.106.d/out/tmp_106.vcf.gz job_out.107.d/out/tmp_107.vcf.gz job_out.108.d/out/tmp_108.vcf.gz job_out.109.d/out/tmp_109.vcf.gz job_out.110.d/out/tmp_110.vcf.gz job_out.111.d/out/tmp_111.vcf.gz job_out.112.d/out/tmp_112.vcf.gz job_out.113.d/out/tmp_113.vcf.gz job_out.114.d/out/tmp_114.vcf.gz job_out.115.d/out/tmp_115.vcf.gz job_out.116.d/out/tmp_116.vcf.gz job_out.117.d/out/tmp_117.vcf.gz job_out.118.d/out/tmp_118.vcf.gz job_out.119.d/out/tmp_119.vcf.gz job_out.120.d/out/tmp_120.vcf.gz job_out.121.d/out/tmp_121.vcf.gz job_out.122.d/out/tmp_122.vcf.gz job_out.123.d/out/tmp_123.vcf.gz job_out.124.d/out/tmp_124.vcf.gz job_out.125.d/out/tmp_125.vcf.gz job_out.126.d/out/tmp_126.vcf.gz job_out.127.d/out/tmp_127.vcf.gz job_out.128.d/out/tmp_128.vcf.gz job_out.129.d/out/tmp_129.vcf.gz job_out.130.d/out/tmp_130.vcf.gz job_out.131.d/out/tmp_131.vcf.gz job_out.132.d/out/tmp_132.vcf.gz; Date=Wed Sep 18 14:03:41 2019
140140
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00102-N1-DNA1-WES1
141-
17 41215920 rs28897696 G T 125 . AC=2;AF=1;AN=2;DP=5;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.81;QD=31.25;SOR=3.258;gnomad_exomes_an=0;gnomad_exomes_hom=0;gnomad_exomes_het=0;gnomad_genomes_an=251312;gnomad_genomes_hom=0;gnomad_genomes_het=5;clinvar_vcv=VCV000055407.47;clinvar_germline_classification=Pathogenic;ANN=T|missense_variant|MODERATE|BRCA1|HGNC:1100|transcript|NM_007294.4|Coding&ManeSelect|17/23|g.41215920G>T|c.5123C>A|p.Ala1708Glu|5236/7088|5123/5592|1708/1864|0|-1| GT:AD:DP:GQ:PL 1/1:0,4:4:12:162,12,0
141+
17 41215920 rs28897696 G T 125 . AC=2;AF=1;AN=2;DP=5;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.81;QD=31.25;SOR=3.258;gnomad_exomes_an=0;gnomad_exomes_hom=0;gnomad_exomes_het=0;gnomad_genomes_an=251312;gnomad_genomes_hom=0;gnomad_genomes_het=5;clinvar_vcv=VCV000055407.47;clinvar_germline_classification=Pathogenic;ANN=T|missense_variant|MODERATE|BRCA1|HGNC%3A1100|transcript|NM_007294.4|Coding&ManeSelect|17/23|g.41215920G>T|c.5123C>A|p.Ala1708Glu|5236/7088|5123/5592|1708/1864|0|-1| GT:AD:DP:GQ:PL 1/1:0,4:4:12:162,12,0

src/annotate/strucvars/mod.rs

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,7 @@ pub struct GenotypeInfo {
557557
pub amq: Option<i32>,
558558
/// Copy number.
559559
#[serde(default, skip_serializing_if = "Option::is_none")]
560-
pub cn: Option<i32>,
560+
pub cn: Option<f32>,
561561
/// Average normalized coverage.
562562
#[serde(default, skip_serializing_if = "Option::is_none")]
563563
pub anc: Option<f32>,
@@ -887,7 +887,7 @@ impl AsyncAnnotatedVariantWriter for VarFishStrucvarTsvWriter {
887887
entry.src = Some(*src);
888888
}
889889
// amq
890-
("CN", Some(sample::Value::Integer(cn))) => {
890+
("CN", Some(sample::Value::Float(cn))) => {
891891
entry.cn = Some(*cn);
892892
}
893893
// anc
@@ -1403,7 +1403,7 @@ impl TryInto<VcfRecord> for VarFishStrucvarTsvRecord {
14031403
.amq
14041404
.as_ref()
14051405
.map(|amq| sample::Value::Integer(*amq)),
1406-
genotype.cn.as_ref().map(|cn| sample::Value::Integer(*cn)),
1406+
genotype.cn.as_ref().map(|cn| sample::Value::Float(*cn)),
14071407
genotype.anc.as_ref().map(|anc| sample::Value::Float(*anc)),
14081408
genotype.pc.as_ref().map(|pc| sample::Value::Integer(*pc)),
14091409
]);
@@ -2102,9 +2102,12 @@ mod conv {
21022102
process_gt(&mut entries, sample_no, &gt, pedigree, tsv_record);
21032103
}
21042104
// Obtain `GenotypeInfo::cn` from `FORMAT/CN`.
2105-
("CN", Some(sample::Value::Integer(cn))) => {
2105+
("CN", Some(sample::Value::Float(cn))) => {
21062106
entries[sample_no].cn = Some(*cn);
21072107
}
2108+
("CN", Some(sample::Value::Integer(cn))) => {
2109+
entries[sample_no].cn = Some(*cn as f32);
2110+
}
21082111
// Obtain `GenotypeInfo::gq` from `FORMAT/GQ`.
21092112
("GQ", Some(sample::Value::Integer(gq))) => {
21102113
entries[sample_no].gq = Some(*gq);
@@ -2287,9 +2290,12 @@ mod conv {
22872290
entries[sample_no].pev = Some(*pe);
22882291
}
22892292
// Obtain `GenotypeInfo::cn` from `FORMAT/CN`.
2290-
("CN", Some(sample::Value::Integer(cn))) => {
2293+
("CN", Some(sample::Value::Float(cn))) => {
22912294
entries[sample_no].cn = Some(*cn);
22922295
}
2296+
("CN", Some(sample::Value::Integer(cn))) => {
2297+
entries[sample_no].cn = Some(*cn as f32);
2298+
}
22932299
// Obtain `GenotypeInfo::pc` from `FORMAT/BC`.
22942300
("BC", Some(sample::Value::Integer(bc))) => {
22952301
entries[sample_no].pc = Some(*bc);
@@ -2402,9 +2408,12 @@ mod conv {
24022408
process_gt(&mut entries, sample_no, &gt, pedigree, tsv_record);
24032409
}
24042410
// Obtain `GenotypeInfo::cn` from `FORMAT/CN`.
2405-
("CN", Some(sample::Value::Integer(cn))) => {
2411+
("CN", Some(sample::Value::Float(cn))) => {
24062412
entries[sample_no].cn = Some(*cn);
24072413
}
2414+
("CN", Some(sample::Value::Integer(cn))) => {
2415+
entries[sample_no].cn = Some(*cn as f32);
2416+
}
24082417
// Obtain `GenotypeInfo::pc` from `FORMAT/NP`.
24092418
("NP", Some(sample::Value::Integer(np))) => {
24102419
entries[sample_no].pc = Some(*np);
@@ -2780,15 +2789,24 @@ mod conv {
27802789
.unwrap_or_else(|| panic!("sample must be in pedigree: {:?}", &entry.name))
27812790
.sex;
27822791
let expected_cn = match (sex, is_chr_x, is_chr_y) {
2783-
(_, false, false) => Some(2),
2784-
(Sex::Male, false, true) | (Sex::Male, true, false) => Some(1),
2785-
(Sex::Female, true, false) => Some(2),
2792+
(_, false, false) => Some(2.),
2793+
(Sex::Male, false, true) | (Sex::Male, true, false) => Some(1.),
2794+
(Sex::Female, true, false) => Some(2.),
27862795
// do not count, sex missing or conflicting chromosome
27872796
_ => None,
27882797
};
27892798
if let (Some(cn), Some(expected_cn)) = (entry.cn, expected_cn) {
27902799
let delta = (cn - expected_cn).abs();
2791-
if expected_cn == 1 {
2800+
if delta.round() != delta {
2801+
tracing::warn!(
2802+
"Fractional CN value: {} and expected {} for sample {}, rounding.",
2803+
cn,
2804+
expected_cn,
2805+
entry.name,
2806+
);
2807+
}
2808+
let delta = delta.round() as i32;
2809+
if expected_cn == 1. {
27922810
tsv_record.num_hemi_alt += delta;
27932811
} else if delta == 0 {
27942812
tsv_record.num_hom_ref += 1;
@@ -3988,7 +4006,7 @@ mod test {
39884006
src: Some(143),
39894007
srv: Some(43),
39904008
amq: Some(99),
3991-
cn: Some(1),
4009+
cn: Some(1.),
39924010
anc: Some(0.5),
39934011
pc: Some(10),
39944012
},
@@ -4002,7 +4020,7 @@ mod test {
40024020
src: Some(44),
40034021
srv: Some(0),
40044022
amq: Some(98),
4005-
cn: Some(2),
4023+
cn: Some(2.),
40064024
anc: Some(1.0),
40074025
pc: Some(10),
40084026
},
@@ -4016,7 +4034,7 @@ mod test {
40164034
src: Some(33),
40174035
srv: Some(0),
40184036
amq: Some(97),
4019-
cn: Some(2),
4037+
cn: Some(2.),
40204038
anc: Some(1.0),
40214039
pc: Some(10),
40224040
},
@@ -4064,7 +4082,7 @@ mod test {
40644082
src: Some(143),
40654083
srv: Some(43),
40664084
amq: Some(99),
4067-
cn: Some(1),
4085+
cn: Some(1.),
40684086
anc: Some(0.5),
40694087
pc: Some(10),
40704088
},
@@ -4078,7 +4096,7 @@ mod test {
40784096
src: Some(143),
40794097
srv: Some(43),
40804098
amq: Some(99),
4081-
cn: Some(1),
4099+
cn: Some(1.),
40824100
anc: Some(0.5),
40834101
pc: Some(10),
40844102
},
@@ -4092,7 +4110,7 @@ mod test {
40924110
src: Some(143),
40934111
srv: Some(43),
40944112
amq: Some(99),
4095-
cn: Some(1),
4113+
cn: Some(1.),
40964114
anc: Some(0.5),
40974115
pc: Some(10),
40984116
},

0 commit comments

Comments
 (0)