@@ -12,7 +12,7 @@ use std::time::Instant;
12
12
use self :: ann:: { AnnField , FeatureBiotype } ;
13
13
use crate :: annotate:: cli:: { Sources , TranscriptSettings } ;
14
14
use crate :: annotate:: genotype_string;
15
- use crate :: annotate:: seqvars:: ann:: FeatureTag ;
15
+ use crate :: annotate:: seqvars:: ann:: { FeatureTag , FeatureType } ;
16
16
use crate :: annotate:: seqvars:: csq:: {
17
17
ConfigBuilder as ConsequencePredictorConfigBuilder , ConsequencePredictor , VcfVariant ,
18
18
} ;
@@ -977,6 +977,13 @@ impl VarFishSeqvarTsvWriter {
977
977
anns. sort_by_key ( |ann| ann. consequences [ 0 ] ) ;
978
978
}
979
979
980
+ let empty_hgnc_record = HgncRecord {
981
+ hgnc_id : "" . to_string ( ) ,
982
+ ensembl_gene_id : "" . to_string ( ) ,
983
+ entrez_id : "" . to_string ( ) ,
984
+ gene_symbol : "" . to_string ( ) ,
985
+ } ;
986
+
980
987
// For each gene in `anns_by_gene`, assign only the `refseq_*` and `ensembl_*` values into
981
988
// `tsv_record` and write out the record. We clear the record before each iteration
982
989
// so data does not leak to other genes. We use `self.hgnc_map` to map from the gene
@@ -988,7 +995,34 @@ impl VarFishSeqvarTsvWriter {
988
995
// xlink table is not on the same version as the cdot data.
989
996
let hgnc_record = match self . hgnc_map . as_ref ( ) . unwrap ( ) . get ( hgnc_id) {
990
997
Some ( hgnc_record) => hgnc_record,
991
- None => continue ,
998
+ None => {
999
+ if hgnc_id. is_empty ( ) {
1000
+ // If the HGNC ID is empty, this is likely an intergenic variant.
1001
+ // We will check for this explicitly and construct a bogus hgnc record.
1002
+ if anns. iter ( ) . all ( |a| {
1003
+ a. feature_type
1004
+ == FeatureType :: Custom {
1005
+ value : "Intergenic" . into ( ) ,
1006
+ }
1007
+ } ) {
1008
+ & empty_hgnc_record
1009
+ } else {
1010
+ tracing:: warn!(
1011
+ "Empty HGNC id for {}:{}-{}, skipping." ,
1012
+ tsv_record. chromosome,
1013
+ tsv_record. start,
1014
+ tsv_record. end
1015
+ ) ;
1016
+ continue ;
1017
+ }
1018
+ } else {
1019
+ tracing:: warn!(
1020
+ "HGNC record for {} not found in HGNC map, skipping gene" ,
1021
+ hgnc_id
1022
+ ) ;
1023
+ continue ;
1024
+ }
1025
+ }
992
1026
} ;
993
1027
994
1028
tsv_record. clear_refseq_ensembl ( ) ;
0 commit comments