Skip to content

Commit 636f004

Browse files
author
Oliver Stolpe
authored
Fix: Ensembl ID converstion must be u64 (#618)
1 parent 00ab51f commit 636f004

File tree

4 files changed

+14
-9
lines changed

4 files changed

+14
-9
lines changed

protos/varfish/v1/strucvars/bgdb.proto

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ message XlinkRecord {
6767
// Entrez gene ID.
6868
uint32 entrez_id = 3;
6969
// Ensembl gene ID.
70-
uint32 ensembl_id = 4;
70+
uint64 ensembl_id = 4;
7171
}
7272

7373
// Gene cross-link database.

src/common/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ pub fn reciprocal_overlap(lhs: Range<i32>, rhs: Range<i32>) -> f32 {
9999
}
100100
}
101101

102-
/// Helper to convert ENSEMBL and RefSeq gene ID to u32.
103-
pub fn numeric_gene_id(raw_id: &str) -> Result<u32, anyhow::Error> {
102+
/// Helper to convert ENSEMBL and RefSeq gene ID to u64.
103+
pub fn numeric_gene_id(raw_id: &str) -> Result<u64, anyhow::Error> {
104104
let clean_id = if raw_id.starts_with("ENSG") {
105105
// Strip "ENSG" prefix and as many zeroes as follow
106106
raw_id
@@ -113,7 +113,7 @@ pub fn numeric_gene_id(raw_id: &str) -> Result<u32, anyhow::Error> {
113113
};
114114

115115
clean_id
116-
.parse::<u32>()
116+
.parse::<u64>()
117117
.map_err(|e| anyhow::anyhow!("could not parse gene id {:?}: {}", &clean_id, &e))
118118
}
119119

@@ -694,7 +694,7 @@ mod test {
694694
#[rstest::rstest]
695695
#[case("ENSG0000000142", 142)]
696696
#[case("42", 42)]
697-
fn numeric_gene_id(#[case] raw_id: &str, #[case] expected: u32) {
697+
fn numeric_gene_id(#[case] raw_id: &str, #[case] expected: u64) {
698698
let actual = super::numeric_gene_id(raw_id).unwrap();
699699
assert_eq!(expected, actual);
700700
}

src/strucvars/query/genes.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use crate::{common::GenomeRelease, pbs};
1313
#[derive(Default, Debug)]
1414
pub struct XlinkDbRecord {
1515
pub entrez_id: u32,
16-
pub ensembl_gene_id: u32,
16+
pub ensembl_gene_id: u64,
1717
pub symbol: String,
1818
pub hgnc_id: String,
1919
}
@@ -26,7 +26,7 @@ pub struct XlinkDb {
2626
/// Link from entrez ID to indices in records.
2727
pub from_entrez: multimap::MultiMap<u32, u32>,
2828
/// Link from ensembl ID to indices in records.
29-
pub from_ensembl: multimap::MultiMap<u32, u32>,
29+
pub from_ensembl: multimap::MultiMap<u64, u32>,
3030
/// Link from HGNC ID to indices in records.
3131
pub from_hgnc: multimap::MultiMap<String, u32>,
3232
}

src/strucvars/query/mod.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -856,7 +856,12 @@ pub fn translate_genes(genes: &Vec<String>, dbs: &InMemoryDbs) -> HashSet<String
856856
let gene = gene.trim();
857857
if re_entrez.is_match(gene) {
858858
if let Ok(gene_id) = numeric_gene_id(gene) {
859-
if let Some(record_ids) = dbs.genes.xlink.from_ensembl.get_vec(&gene_id) {
859+
if let Some(record_ids) = dbs
860+
.genes
861+
.xlink
862+
.from_entrez
863+
.get_vec(&u32::try_from(gene_id).expect("entrez id should fit in u32"))
864+
{
860865
for record_id in record_ids {
861866
result.insert(dbs.genes.xlink.records[*record_id as usize].hgnc_id.clone());
862867
}
@@ -867,7 +872,7 @@ pub fn translate_genes(genes: &Vec<String>, dbs: &InMemoryDbs) -> HashSet<String
867872
}
868873
} else if re_ensembl.is_match(gene) {
869874
if let Ok(gene_id) = numeric_gene_id(gene) {
870-
if let Some(record_ids) = dbs.genes.xlink.from_entrez.get_vec(&gene_id) {
875+
if let Some(record_ids) = dbs.genes.xlink.from_ensembl.get_vec(&gene_id) {
871876
for record_id in record_ids {
872877
result.insert(dbs.genes.xlink.records[*record_id as usize].hgnc_id.clone());
873878
}

0 commit comments

Comments
 (0)