Skip to content

Commit f44a1e5

Browse files
authored
fix: TSV output + --keep-intergenic (#785)
1 parent 7c7941e commit f44a1e5

File tree

2 files changed

+40
-2
lines changed

2 files changed

+40
-2
lines changed

src/annotate/seqvars/csq.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,10 @@ impl ConsequencePredictor {
390390
== TranscriptBiotype::Coding
391391
&& tx.start_codon.is_none()
392392
{
393+
tracing::debug!(
394+
"Skipping transcript {} because it is coding but has no CDS",
395+
&tx_record.tx_ac
396+
);
393397
return Ok(None);
394398
}
395399
tx

src/annotate/seqvars/mod.rs

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::time::Instant;
1212
use self::ann::{AnnField, FeatureBiotype};
1313
use crate::annotate::cli::{Sources, TranscriptSettings};
1414
use crate::annotate::genotype_string;
15-
use crate::annotate::seqvars::ann::FeatureTag;
15+
use crate::annotate::seqvars::ann::{FeatureTag, FeatureType};
1616
use crate::annotate::seqvars::csq::{
1717
ConfigBuilder as ConsequencePredictorConfigBuilder, ConsequencePredictor, VcfVariant,
1818
};
@@ -977,6 +977,13 @@ impl VarFishSeqvarTsvWriter {
977977
anns.sort_by_key(|ann| ann.consequences[0]);
978978
}
979979

980+
let empty_hgnc_record = HgncRecord {
981+
hgnc_id: "".to_string(),
982+
ensembl_gene_id: "".to_string(),
983+
entrez_id: "".to_string(),
984+
gene_symbol: "".to_string(),
985+
};
986+
980987
// For each gene in `anns_by_gene`, assign only the `refseq_*` and `ensembl_*` values into
981988
// `tsv_record` and write out the record. We clear the record before each iteration
982989
// so data does not leak to other genes. We use `self.hgnc_map` to map from the gene
@@ -988,7 +995,34 @@ impl VarFishSeqvarTsvWriter {
988995
// xlink table is not on the same version as the cdot data.
989996
let hgnc_record = match self.hgnc_map.as_ref().unwrap().get(hgnc_id) {
990997
Some(hgnc_record) => hgnc_record,
991-
None => continue,
998+
None => {
999+
if hgnc_id.is_empty() {
1000+
// If the HGNC ID is empty, this is likely an intergenic variant.
1001+
// We will check for this explicitly and construct a bogus hgnc record.
1002+
if anns.iter().all(|a| {
1003+
a.feature_type
1004+
== FeatureType::Custom {
1005+
value: "Intergenic".into(),
1006+
}
1007+
}) {
1008+
&empty_hgnc_record
1009+
} else {
1010+
tracing::warn!(
1011+
"Empty HGNC id for {}:{}-{}, skipping.",
1012+
tsv_record.chromosome,
1013+
tsv_record.start,
1014+
tsv_record.end
1015+
);
1016+
continue;
1017+
}
1018+
} else {
1019+
tracing::warn!(
1020+
"HGNC record for {} not found in HGNC map, skipping gene",
1021+
hgnc_id
1022+
);
1023+
continue;
1024+
}
1025+
}
9921026
};
9931027

9941028
tsv_record.clear_refseq_ensembl();

0 commit comments

Comments
 (0)