Skip to content

Commit 3be028d

Browse files
authored
fix: Use noodle's VCF FileFormat::default() (#778)
1 parent d1b5eac commit 3be028d

File tree

3 files changed

+49
-25
lines changed

3 files changed

+49
-25
lines changed

src/annotate/seqvars/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ fn build_header(
151151
additional_records: &[(String, String)],
152152
) -> VcfHeader {
153153
let mut header_out = header_in.clone();
154+
*header_out.file_format_mut() = FileFormat::default();
154155

155156
if with_frequencies {
156157
header_out.infos_mut().insert(
@@ -662,6 +663,7 @@ impl VarFishSeqvarTsvWriter {
662663
.header
663664
.as_ref()
664665
.expect("VCF header must be set/written");
666+
let file_format_gt = FileFormat::new(4, 3);
665667
let mut gt_calls = GenotypeCalls::default();
666668
let samples = record.samples();
667669
let sample_names = hdr.sample_names().iter();
@@ -673,7 +675,7 @@ impl VarFishSeqvarTsvWriter {
673675
genotypes.as_ref().and_then(|gt| {
674676
gt.get(sample_idx).map(|value| match value {
675677
Some(Value::String(s)) => s.to_owned(),
676-
Some(Value::Genotype(gt)) => genotype_string(gt, FileFormat::new(4, 3)),
678+
Some(Value::Genotype(gt)) => genotype_string(gt, file_format_gt),
677679
_ => ".".into(),
678680
})
679681
})

src/annotate/seqvars/snapshots/mehari__annotate__seqvars__test__smoke_test_output_vcf.snap

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
source: src/annotate/seqvars/mod.rs
33
expression: actual
44
---
5-
##fileformat=VCFv4.2
5+
##fileformat=VCFv4.5
66
##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
77
##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
88
##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
@@ -138,4 +138,4 @@ expression: actual
138138
##bcftools_concatVersion=1.9+htslib-1.9
139139
##bcftools_concatCommand=concat --allow-overlaps -d none -o output/out.vcf.gz -O z job_out.0.d/out/tmp_0.vcf.gz job_out.1.d/out/tmp_1.vcf.gz job_out.2.d/out/tmp_2.vcf.gz job_out.3.d/out/tmp_3.vcf.gz job_out.4.d/out/tmp_4.vcf.gz job_out.5.d/out/tmp_5.vcf.gz job_out.6.d/out/tmp_6.vcf.gz job_out.7.d/out/tmp_7.vcf.gz job_out.8.d/out/tmp_8.vcf.gz job_out.9.d/out/tmp_9.vcf.gz job_out.10.d/out/tmp_10.vcf.gz job_out.11.d/out/tmp_11.vcf.gz job_out.12.d/out/tmp_12.vcf.gz job_out.13.d/out/tmp_13.vcf.gz job_out.14.d/out/tmp_14.vcf.gz job_out.15.d/out/tmp_15.vcf.gz job_out.16.d/out/tmp_16.vcf.gz job_out.17.d/out/tmp_17.vcf.gz job_out.18.d/out/tmp_18.vcf.gz job_out.19.d/out/tmp_19.vcf.gz job_out.20.d/out/tmp_20.vcf.gz job_out.21.d/out/tmp_21.vcf.gz job_out.22.d/out/tmp_22.vcf.gz job_out.23.d/out/tmp_23.vcf.gz job_out.24.d/out/tmp_24.vcf.gz job_out.25.d/out/tmp_25.vcf.gz job_out.26.d/out/tmp_26.vcf.gz job_out.27.d/out/tmp_27.vcf.gz job_out.28.d/out/tmp_28.vcf.gz job_out.29.d/out/tmp_29.vcf.gz job_out.30.d/out/tmp_30.vcf.gz job_out.31.d/out/tmp_31.vcf.gz job_out.32.d/out/tmp_32.vcf.gz job_out.33.d/out/tmp_33.vcf.gz job_out.34.d/out/tmp_34.vcf.gz job_out.35.d/out/tmp_35.vcf.gz job_out.36.d/out/tmp_36.vcf.gz job_out.37.d/out/tmp_37.vcf.gz job_out.38.d/out/tmp_38.vcf.gz job_out.39.d/out/tmp_39.vcf.gz job_out.40.d/out/tmp_40.vcf.gz job_out.41.d/out/tmp_41.vcf.gz job_out.42.d/out/tmp_42.vcf.gz job_out.43.d/out/tmp_43.vcf.gz job_out.44.d/out/tmp_44.vcf.gz job_out.45.d/out/tmp_45.vcf.gz job_out.46.d/out/tmp_46.vcf.gz job_out.47.d/out/tmp_47.vcf.gz job_out.48.d/out/tmp_48.vcf.gz job_out.49.d/out/tmp_49.vcf.gz job_out.50.d/out/tmp_50.vcf.gz job_out.51.d/out/tmp_51.vcf.gz job_out.52.d/out/tmp_52.vcf.gz job_out.53.d/out/tmp_53.vcf.gz job_out.54.d/out/tmp_54.vcf.gz job_out.55.d/out/tmp_55.vcf.gz job_out.56.d/out/tmp_56.vcf.gz job_out.57.d/out/tmp_57.vcf.gz job_out.58.d/out/tmp_58.vcf.gz job_out.59.d/out/tmp_59.vcf.gz job_out.60.d/out/tmp_60.vcf.gz job_out.61.d/out/tmp_61.vcf.gz job_out.62.d/out/tmp_62.vcf.gz job_out.63.d/out/tmp_63.vcf.gz job_out.64.d/out/tmp_64.vcf.gz job_out.65.d/out/tmp_65.vcf.gz job_out.66.d/out/tmp_66.vcf.gz job_out.67.d/out/tmp_67.vcf.gz job_out.68.d/out/tmp_68.vcf.gz job_out.69.d/out/tmp_69.vcf.gz job_out.70.d/out/tmp_70.vcf.gz job_out.71.d/out/tmp_71.vcf.gz job_out.72.d/out/tmp_72.vcf.gz job_out.73.d/out/tmp_73.vcf.gz job_out.74.d/out/tmp_74.vcf.gz job_out.75.d/out/tmp_75.vcf.gz job_out.76.d/out/tmp_76.vcf.gz job_out.77.d/out/tmp_77.vcf.gz job_out.78.d/out/tmp_78.vcf.gz job_out.79.d/out/tmp_79.vcf.gz job_out.80.d/out/tmp_80.vcf.gz job_out.81.d/out/tmp_81.vcf.gz job_out.82.d/out/tmp_82.vcf.gz job_out.83.d/out/tmp_83.vcf.gz job_out.84.d/out/tmp_84.vcf.gz job_out.85.d/out/tmp_85.vcf.gz job_out.86.d/out/tmp_86.vcf.gz job_out.87.d/out/tmp_87.vcf.gz job_out.88.d/out/tmp_88.vcf.gz job_out.89.d/out/tmp_89.vcf.gz job_out.90.d/out/tmp_90.vcf.gz job_out.91.d/out/tmp_91.vcf.gz job_out.92.d/out/tmp_92.vcf.gz job_out.93.d/out/tmp_93.vcf.gz job_out.94.d/out/tmp_94.vcf.gz job_out.95.d/out/tmp_95.vcf.gz job_out.96.d/out/tmp_96.vcf.gz job_out.97.d/out/tmp_97.vcf.gz job_out.98.d/out/tmp_98.vcf.gz job_out.99.d/out/tmp_99.vcf.gz job_out.100.d/out/tmp_100.vcf.gz job_out.101.d/out/tmp_101.vcf.gz job_out.102.d/out/tmp_102.vcf.gz job_out.103.d/out/tmp_103.vcf.gz job_out.104.d/out/tmp_104.vcf.gz job_out.105.d/out/tmp_105.vcf.gz job_out.106.d/out/tmp_106.vcf.gz job_out.107.d/out/tmp_107.vcf.gz job_out.108.d/out/tmp_108.vcf.gz job_out.109.d/out/tmp_109.vcf.gz job_out.110.d/out/tmp_110.vcf.gz job_out.111.d/out/tmp_111.vcf.gz job_out.112.d/out/tmp_112.vcf.gz job_out.113.d/out/tmp_113.vcf.gz job_out.114.d/out/tmp_114.vcf.gz job_out.115.d/out/tmp_115.vcf.gz job_out.116.d/out/tmp_116.vcf.gz job_out.117.d/out/tmp_117.vcf.gz job_out.118.d/out/tmp_118.vcf.gz job_out.119.d/out/tmp_119.vcf.gz job_out.120.d/out/tmp_120.vcf.gz job_out.121.d/out/tmp_121.vcf.gz job_out.122.d/out/tmp_122.vcf.gz job_out.123.d/out/tmp_123.vcf.gz job_out.124.d/out/tmp_124.vcf.gz job_out.125.d/out/tmp_125.vcf.gz job_out.126.d/out/tmp_126.vcf.gz job_out.127.d/out/tmp_127.vcf.gz job_out.128.d/out/tmp_128.vcf.gz job_out.129.d/out/tmp_129.vcf.gz job_out.130.d/out/tmp_130.vcf.gz job_out.131.d/out/tmp_131.vcf.gz job_out.132.d/out/tmp_132.vcf.gz; Date=Wed Sep 18 14:03:41 2019
140140
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00102-N1-DNA1-WES1
141-
17 41215920 rs28897696 G T 125 . AC=2;AF=1;AN=2;DP=5;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.81;QD=31.25;SOR=3.258;gnomad_exomes_an=0;gnomad_exomes_hom=0;gnomad_exomes_het=0;gnomad_genomes_an=251312;gnomad_genomes_hom=0;gnomad_genomes_het=5;clinvar_vcv=VCV000055407.47;clinvar_germline_classification=Pathogenic;ANN=T|missense_variant|MODERATE|BRCA1|HGNC%3A1100|transcript|NM_007294.4|Coding|ManeSelect&RefSeqSelect|17/23|g.41215920G>T|c.5123C>A|p.Ala1708Glu|5236/7088|5123/5592|1708/1864|0|-1| GT:AD:DP:GQ:PL 1/1:0,4:4:12:162,12,0
141+
17 41215920 rs28897696 G T 125 . AC=2;AF=1;AN=2;DP=5;ExcessHet=3.0103;FS=0;MLEAC=2;MLEAF=1;MQ=22.81;QD=31.25;SOR=3.258;gnomad_exomes_an=0;gnomad_exomes_hom=0;gnomad_exomes_het=0;gnomad_genomes_an=251312;gnomad_genomes_hom=0;gnomad_genomes_het=5;clinvar_vcv=VCV000055407.47;clinvar_germline_classification=Pathogenic;ANN=T|missense_variant|MODERATE|BRCA1|HGNC%3A1100|transcript|NM_007294.4|Coding|ManeSelect&RefSeqSelect|17/23|g.41215920G>T|c.5123C>A|p.Ala1708Glu|5236/7088|5123/5592|1708/1864|0|-1| GT:AD:DP:GQ:PL /1/1:0,4:4:12:162,12,0

src/annotate/strucvars/mod.rs

Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -141,17 +141,17 @@ pub mod vcf_header {
141141
use crate::ped::{Disease, PedigreeByName, Sex};
142142

143143
/// Major VCF version to use.
144-
static FILE_FORMAT_MAJOR: u32 = 4;
144+
pub(crate) const FILE_FORMAT_MAJOR: u32 = 4;
145145
/// Minor VCF version to use.
146-
static FILE_FORMAT_MINOR: u32 = 3;
146+
pub(crate) const FILE_FORMAT_MINOR: u32 = 3;
147147
/// The string to write out as the source.
148-
static SOURCE: &str = "mehari";
148+
pub(crate) const SOURCE: &str = "mehari";
149149

150150
/// Construct VCF header.
151151
///
152152
/// # Arguments
153153
///
154-
/// * `assembly` - Genome assembly to use. The canonical contigs will be taken from here.
154+
/// * `assembly` - Genome assembly to use. The canonical contigs will be taken from here.
155155
/// * `pedigree` - Pedigree to use. Will write out appropriate `META`, `SAMPLE`, and
156156
/// `PEDIGREE` header lines.
157157
/// * `date` - Date to use for the `fileDate` header line.
@@ -514,6 +514,8 @@ pub mod vcf_header {
514514
}
515515
}
516516

517+
use crate::annotate::strucvars::vcf_header::{FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR};
518+
517519
/// Writing of structural variants to VarFish TSV files.
518520
struct VarFishStrucvarTsvWriter {
519521
/// The actual (compressed) text output writer.
@@ -864,8 +866,10 @@ impl AsyncAnnotatedVariantWriter for VarFishStrucvarTsvWriter {
864866
entry.gt = Some(gt.clone());
865867
}
866868
("GT", Some(sample::Value::Genotype(gt))) => {
867-
// FIXME get file format version from header
868-
let gt = genotype_string(gt, FileFormat::new(4, 0));
869+
let gt = genotype_string(
870+
gt,
871+
FileFormat::new(FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR),
872+
);
869873
entry.gt = Some(gt);
870874
}
871875
("FT", Some(sample::Value::String(ft))) => {
@@ -2018,6 +2022,8 @@ mod conv {
20182022
use super::VarFishStrucvarTsvRecord;
20192023
use super::VcfRecordConverter;
20202024

2025+
use super::vcf_header::{FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR};
2026+
20212027
/// Helper function that extract the CIPOS and CIEND fields from `vcf_record` into `tsv_record`.
20222028
pub fn extract_standard_cis(
20232029
vcf_record: &VcfRecord,
@@ -2097,8 +2103,10 @@ mod conv {
20972103
process_gt(&mut entries, sample_no, gt, pedigree, tsv_record);
20982104
}
20992105
("GT", Some(sample::Value::Genotype(gt))) => {
2100-
// FIXME get file format version from header
2101-
let gt = genotype_string(gt, FileFormat::new(4, 0));
2106+
let gt = genotype_string(
2107+
gt,
2108+
FileFormat::new(FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR),
2109+
);
21022110
process_gt(&mut entries, sample_no, &gt, pedigree, tsv_record);
21032111
}
21042112
// Obtain `GenotypeInfo::cn` from `FORMAT/CN`.
@@ -2183,8 +2191,10 @@ mod conv {
21832191
process_gt(&mut entries, sample_no, gt, pedigree, tsv_record);
21842192
}
21852193
("GT", Some(sample::Value::Genotype(gt))) => {
2186-
// FIXME get file format version from header
2187-
let gt = genotype_string(gt, FileFormat::new(4, 0));
2194+
let gt = genotype_string(
2195+
gt,
2196+
FileFormat::new(FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR),
2197+
);
21882198
process_gt(&mut entries, sample_no, &gt, pedigree, tsv_record);
21892199
}
21902200
// Obtain `GenotypeInfo::gq` from `FORMAT/GQ`.
@@ -2281,8 +2291,10 @@ mod conv {
22812291
process_gt(&mut entries, sample_no, gt, pedigree, tsv_record);
22822292
}
22832293
("GT", Some(sample::Value::Genotype(gt))) => {
2284-
// FIXME get file format version from header
2285-
let gt = genotype_string(gt, FileFormat::new(4, 0));
2294+
let gt = genotype_string(
2295+
gt,
2296+
FileFormat::new(FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR),
2297+
);
22862298
process_gt(&mut entries, sample_no, &gt, pedigree, tsv_record);
22872299
}
22882300
// Obtain `GenotypeInfo::pev` from `FORMAT/PE`; no pec is computed.
@@ -2403,8 +2415,10 @@ mod conv {
24032415
process_gt(&mut entries, sample_no, gt, pedigree, tsv_record);
24042416
}
24052417
("GT", Some(sample::Value::Genotype(gt))) => {
2406-
// FIXME get file format version from header
2407-
let gt = genotype_string(gt, FileFormat::new(4, 0));
2418+
let gt = genotype_string(
2419+
gt,
2420+
FileFormat::new(FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR),
2421+
);
24082422
process_gt(&mut entries, sample_no, &gt, pedigree, tsv_record);
24092423
}
24102424
// Obtain `GenotypeInfo::cn` from `FORMAT/CN`.
@@ -2471,8 +2485,10 @@ mod conv {
24712485
process_gt(&mut entries, sample_no, gt, pedigree, tsv_record);
24722486
}
24732487
("GT", Some(sample::Value::Genotype(gt))) => {
2474-
// FIXME get file format version from header
2475-
let gt = genotype_string(gt, FileFormat::new(4, 0));
2488+
let gt = genotype_string(
2489+
gt,
2490+
FileFormat::new(FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR),
2491+
);
24762492
process_gt(&mut entries, sample_no, &gt, pedigree, tsv_record);
24772493
}
24782494
// Obtain `GenotypeInfo::gq` from `FORMAT/GQ`.
@@ -2572,8 +2588,10 @@ mod conv {
25722588
process_gt(&mut entries, sample_no, gt, pedigree, tsv_record);
25732589
}
25742590
("GT", Some(sample::Value::Genotype(gt))) => {
2575-
// FIXME get file format version from header
2576-
let gt = genotype_string(gt, FileFormat::new(4, 0));
2591+
let gt = genotype_string(
2592+
gt,
2593+
FileFormat::new(FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR),
2594+
);
25772595
process_gt(&mut entries, sample_no, &gt, pedigree, tsv_record);
25782596
}
25792597
// Obtain `GenotypeInfo::gq` from `FORMAT/GL`.
@@ -2678,8 +2696,10 @@ mod conv {
26782696
process_gt(&mut entries, sample_no, gt, pedigree, tsv_record);
26792697
}
26802698
("GT", Some(sample::Value::Genotype(gt))) => {
2681-
// FIXME get file format version from header
2682-
let gt = genotype_string(gt, FileFormat::new(4, 0));
2699+
let gt = genotype_string(
2700+
gt,
2701+
FileFormat::new(FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR),
2702+
);
26832703
process_gt(&mut entries, sample_no, &gt, pedigree, tsv_record);
26842704
}
26852705
// Obtain `GenotypeInfo::gq` from `FORMAT/GQ`.
@@ -2876,8 +2896,10 @@ mod conv {
28762896
process_gt(&mut entries, sample_no, gt, pedigree, tsv_record);
28772897
}
28782898
("GT", Some(sample::Value::Genotype(gt))) => {
2879-
// FIXME get file format version from header
2880-
let gt = genotype_string(gt, FileFormat::new(4, 0));
2899+
let gt = genotype_string(
2900+
gt,
2901+
FileFormat::new(FILE_FORMAT_MAJOR, FILE_FORMAT_MINOR),
2902+
);
28812903
process_gt(&mut entries, sample_no, &gt, pedigree, tsv_record);
28822904
}
28832905
// Obtain `GenotypeInfo::gq` from `FORMAT/GQ`.

0 commit comments

Comments
 (0)