Skip to content

Commit 8247a97

Browse files
authored
resolve rapid species home (#10)
1 parent 5e4c3ba commit 8247a97

File tree

7 files changed

+315
-3
lines changed

7 files changed

+315
-3
lines changed

app/api/resources/rapid_view.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
from urllib.parse import parse_qs
2+
from fastapi import APIRouter, Request, HTTPException, Query
3+
from fastapi.responses import RedirectResponse
4+
import logging
5+
from core.logging import InterceptHandler
6+
from core.config import ENSEMBL_URL
7+
from api.utils.metadata import get_genome_id_from_assembly_accession_id
8+
from api.utils.rapid import construct_url, format_assembly_accession
9+
10+
logging.getLogger().handlers = [InterceptHandler()]
11+
12+
router = APIRouter()
13+
14+
15+
# Resolve rapid urls
16+
@router.get("/{species_url_name}/", name="Rapid Species Resources")
17+
@router.get("/{species_url_name}/{subpath:path}", name="Rapid Species Resources")
18+
async def resolve_species(
19+
request: Request, species_url_name: str, subpath: str = "", r: str = Query(None)
20+
) -> RedirectResponse:
21+
assembly_accession_id = format_assembly_accession(species_url_name)
22+
23+
if assembly_accession_id is None:
24+
raise HTTPException(
25+
status_code=422, detail="Unable to process input accession ID"
26+
)
27+
try:
28+
genome_object = get_genome_id_from_assembly_accession_id(assembly_accession_id)
29+
30+
if genome_object and genome_object != {}:
31+
genome_id = genome_object["genome_tag"] or genome_object["genome_uuid"]
32+
33+
# Extract specific parameters because Ensembl url uses ; instead of &
34+
query_string = request.scope["query_string"].decode()
35+
query_params = parse_qs(query_string, separator=";")
36+
37+
url = construct_url(genome_id, subpath, query_params)
38+
return RedirectResponse(url)
39+
else:
40+
raise HTTPException(status_code=404, detail="Genome not found")
41+
42+
except HTTPException as e:
43+
logging.debug(e)
44+
raise HTTPException(
45+
status_code=e.status_code, detail="Unexpected error occured"
46+
)
47+
48+
except Exception as e:
49+
logging.debug(f"Unexpected error occurred: {e}")
50+
raise HTTPException(status_code=500, detail="Unexpected error occurred")
51+
52+
53+
@router.get("/", name="Rapid Home")
54+
async def resolve_home(request: Request):
55+
return RedirectResponse(ENSEMBL_URL)

app/api/resources/routes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717

1818
from fastapi import APIRouter
1919

20-
from api.resources import resolver_view
20+
from api.resources import resolver_view, rapid_view
2121

2222
router = APIRouter()
2323

2424
router.include_router(resolver_view.router, tags=["resolver"], prefix="/id")
25+
router.include_router(rapid_view.router, tags=["rapid"], prefix="/rapid")

app/api/utils/metadata.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,26 @@ def get_metadata(matches: List[SearchMatch] = []):
2323
)
2424
except requests.exceptions.HTTPError as HTTPError:
2525
logger.error(f"HTTPError: {HTTPError}")
26-
return None
26+
raise HTTPError
2727
except Exception as e:
2828
logger.exception(e)
29-
return None
29+
raise e
3030

3131
return metadata_results
32+
33+
34+
def get_genome_id_from_assembly_accession_id(accession_id: str):
35+
try:
36+
session = requests.Session()
37+
metadata_api_url = (
38+
f"{ENSEMBL_URL}/api/metadata/genomeid?assembly_accession_id={accession_id}"
39+
)
40+
with session.get(url=metadata_api_url) as response:
41+
response.raise_for_status()
42+
return response.json()
43+
except requests.exceptions.HTTPError as HTTPError:
44+
logger.error(f"HTTPError: {HTTPError}")
45+
raise HTTPError
46+
except Exception as e:
47+
logger.exception(e)
48+
raise e

app/api/utils/rapid.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
from loguru import logger
2+
import requests
3+
from core.config import ENSEMBL_URL, NCBI_DATASETS_URL
4+
import re
5+
6+
7+
def get_assembly_accession_from_ncbi(accession_id: str):
8+
try:
9+
session = requests.Session()
10+
ncbi_dataset_api_url = (
11+
f"{NCBI_DATASETS_URL}/genome/accession/{accession_id}/dataset_report"
12+
)
13+
14+
with session.get(url=ncbi_dataset_api_url) as response:
15+
response.raise_for_status()
16+
response_json = response.json()
17+
if response_json and response_json["reports"]:
18+
return response_json["reports"][0]
19+
else:
20+
return None
21+
22+
except requests.exceptions.HTTPError as HTTPError:
23+
logger.error(f"HTTPError: {HTTPError}")
24+
raise HTTPError
25+
except Exception as e:
26+
logger.exception(e)
27+
raise e
28+
29+
30+
def format_assembly_accession(species_url_name: str):
31+
if re.search("_GCA_|_GCF_", species_url_name):
32+
_, accession_id = re.split("_GCA_|_GCF_", species_url_name)
33+
34+
prefix = "GCA_"
35+
if re.search("GCF", species_url_name):
36+
prefix = "GCF_"
37+
38+
assembly_accession_id = prefix + accession_id
39+
40+
# RefSeqs have GCF prefix but version could be different. So fetch it from ncbi
41+
if assembly_accession_id.endswith("rs"):
42+
trimmed_assembly_accession_id = re.sub("rs$", "", assembly_accession_id)
43+
ncbi_dataset_report = get_assembly_accession_from_ncbi(
44+
trimmed_assembly_accession_id
45+
)
46+
47+
if ncbi_dataset_report:
48+
assembly_accession_id = ncbi_dataset_report["paired_accession"]
49+
else:
50+
logger.error("HTTPError: {HTTPError}")
51+
return None
52+
return assembly_accession_id
53+
else:
54+
return None
55+
56+
57+
def construct_url(genome_id, subpath, query_params):
58+
location = query_params.get("r", [None])[0]
59+
gene_id = query_params.get("g", [None])[0]
60+
61+
if subpath == "" or re.search("Info/Index", subpath, re.IGNORECASE):
62+
return f"{ENSEMBL_URL}/species/{genome_id}"
63+
elif re.search("Location", subpath):
64+
return f"{ENSEMBL_URL}/genome-browser/{genome_id}?focus=location:{location}"
65+
elif re.search("Gene", subpath):
66+
if re.search("Gene/Compara_Homolog", subpath):
67+
return (
68+
f"{ENSEMBL_URL}/entity-viewer/{genome_id}/gene:{gene_id}?view=homology"
69+
)
70+
return f"{ENSEMBL_URL}/entity-viewer/{genome_id}/gene:{gene_id}"
71+
elif re.search("Transcript", subpath):
72+
if re.search("Domains|ProteinSummary", subpath):
73+
return (
74+
f"{ENSEMBL_URL}/entity-viewer/{genome_id}/gene:{gene_id}?view=protein"
75+
)
76+
return f"{ENSEMBL_URL}/entity-viewer/{genome_id}/gene:{gene_id}"
77+
return ENSEMBL_URL

app/core/config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@
3636
)
3737
DEFAULT_APP = config("DEFAULT_APP", cast=str, default="entity-viewer")
3838
ENSEMBL_URL = config("ENSEMBL_URL", cast=str, default="https://beta.ensembl.org")
39+
NCBI_DATASETS_URL = config(
40+
"NCBI_DATASETS_URL", cast=str, default="https://api.ncbi.nlm.nih.gov/datasets/v2"
41+
)
42+
3943
DEBUG: bool = config("DEBUG", cast=bool, default=False)
4044
PROJECT_NAME: str = config("PROJECT_NAME", default="Ensembl Web Resolver")
4145
ALLOWED_HOSTS: list[str] = config(

app/tests/test_rapid.py

Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
import unittest
2+
from unittest.mock import patch
3+
from fastapi.testclient import TestClient
4+
from core.config import ENSEMBL_URL
5+
from main import app
6+
7+
8+
class TestRapid(unittest.TestCase):
9+
def setUp(self):
10+
self.client = TestClient(app)
11+
self.api_prefix = ""
12+
self.mock_rapid_api_url = "/rapid"
13+
self.species_url_name = "Human_GCA_123.1"
14+
self.species_url_name_refseq = "DogRefSeq_GCA_123.1rs"
15+
16+
self.mock_genome_id_response1 = {
17+
"genome_uuid": "genome_uuid1",
18+
"release_version": 110,
19+
"genome_tag": "",
20+
}
21+
22+
self.mock_genome_id_response2 = {
23+
"genome_uuid": "genome_uuid2",
24+
"release_version": 110,
25+
"genome_tag": "xyz",
26+
}
27+
28+
self.mock_ncbi_accession = "GCF_000001405.40"
29+
30+
self.mock_resolved_url = {
31+
"genome1": f"{ENSEMBL_URL}/species/genome_uuid1",
32+
"genome2": f"{ENSEMBL_URL}/species/xyz",
33+
}
34+
35+
# Test species home page
36+
@patch("api.resources.rapid_view.get_genome_id_from_assembly_accession_id")
37+
def test_rapid_species_home_success(
38+
self, mock_get_genome_id_from_assembly_accession_id
39+
):
40+
41+
# test web-metadata-api response without genome_tag
42+
mock_get_genome_id_from_assembly_accession_id.return_value = (
43+
self.mock_genome_id_response1
44+
)
45+
46+
response = self.client.get(
47+
f"{self.mock_rapid_api_url}/{self.species_url_name}/",
48+
follow_redirects=False,
49+
)
50+
51+
self.assertEqual(response.status_code, 307) # Temporary Redirect
52+
self.assertIn("location", response.headers)
53+
self.assertEqual(
54+
response.headers["location"], self.mock_resolved_url["genome1"]
55+
)
56+
57+
# test web-metadata-api response with genome_tag
58+
mock_get_genome_id_from_assembly_accession_id.return_value = (
59+
self.mock_genome_id_response2
60+
)
61+
62+
response = self.client.get(
63+
f"{self.mock_rapid_api_url}/{self.species_url_name}/",
64+
follow_redirects=False,
65+
)
66+
67+
self.assertEqual(response.status_code, 307) # Temporary Redirect
68+
self.assertIn("location", response.headers)
69+
self.assertEqual(
70+
response.headers["location"], self.mock_resolved_url["genome2"]
71+
)
72+
73+
# Test Region in detail page
74+
@patch("api.resources.rapid_view.get_genome_id_from_assembly_accession_id")
75+
def test_rapid_species_location_success(
76+
self, mock_get_genome_id_from_assembly_accession_id
77+
):
78+
79+
# test web-metadata-api response without genome_tag
80+
mock_get_genome_id_from_assembly_accession_id.return_value = (
81+
self.mock_genome_id_response1
82+
)
83+
84+
response = self.client.get(
85+
f"{self.mock_rapid_api_url}/{self.species_url_name}/Location/View",
86+
params={"r": "1:1000-2000"},
87+
follow_redirects=False,
88+
)
89+
90+
self.assertEqual(response.status_code, 307) # Redirect
91+
self.assertIn(
92+
f"{ENSEMBL_URL}/genome-browser/genome_uuid1?focus=location:1:1000-2000",
93+
response.headers["location"],
94+
)
95+
96+
# Test Gene pages
97+
@patch("api.resources.rapid_view.get_genome_id_from_assembly_accession_id")
98+
def test_rapid_species_gene_compara_homolog(
99+
self, mock_get_genome_id_from_assembly_accession_id
100+
):
101+
mock_get_genome_id_from_assembly_accession_id.return_value = (
102+
self.mock_genome_id_response1
103+
)
104+
105+
response = self.client.get(
106+
f"{self.mock_rapid_api_url}/{self.species_url_name}/Gene/Compara_Homolog",
107+
params={"g": "GENE123"},
108+
follow_redirects=False,
109+
)
110+
111+
self.assertEqual(response.status_code, 307) # Redirect
112+
self.assertIn(
113+
f"{ENSEMBL_URL}/entity-viewer/genome_uuid1/gene:GENE123?view=homology",
114+
response.headers["location"],
115+
)
116+
117+
# Test 404
118+
@patch("api.resources.rapid_view.get_genome_id_from_assembly_accession_id")
119+
def test_rapid_species_404_not_found(
120+
self, mock_get_genome_id_from_assembly_accession_id
121+
):
122+
mock_get_genome_id_from_assembly_accession_id.return_value = {}
123+
124+
response = self.client.get(
125+
f"{self.mock_rapid_api_url}/{self.species_url_name}/Invalid_GCA"
126+
)
127+
self.assertEqual(response.status_code, 404)
128+
129+
# Test invalid url entity
130+
def test_rapid_species_422_unprocessable_entity(self):
131+
response = self.client.get(f"{self.mock_rapid_api_url}/Invalid_Name/")
132+
self.assertEqual(response.status_code, 422)
133+
134+
# Test POST
135+
def test_rapid_species_post_method_not_allowed(self):
136+
response = self.client.post(
137+
f"{self.mock_rapid_api_url}/{self.species_url_name}/"
138+
)
139+
self.assertEqual(response.status_code, 405)
140+
141+
# Test 500
142+
@patch("api.resources.rapid_view.get_genome_id_from_assembly_accession_id")
143+
def test_rapid_species_500_internal_server_error(
144+
self, mock_get_genome_id_from_assembly_accession_id
145+
):
146+
mock_get_genome_id_from_assembly_accession_id.return_value = (
147+
self.mock_genome_id_response1
148+
)
149+
150+
mock_get_genome_id_from_assembly_accession_id.side_effect = Exception(
151+
"Unexpected error"
152+
)
153+
154+
response = self.client.get(
155+
f"{self.mock_rapid_api_url}/{self.species_url_name}/"
156+
)
157+
self.assertEqual(response.status_code, 500)

sample-env

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ ENSEMBL_SEARCH_HUB_API=https://beta.ensembl.org/api/search/stable-id
33
DEFAULT_APP=entity-viewer
44
ENSEMBL_URL=http://beta.ensembl.org
55
DEBUG=true
6+
NCBI_DATASETS_URL=https://api.ncbi.nlm.nih.gov/datasets/v2

0 commit comments

Comments
 (0)