Skip to content

Commit 1b5cad1

Browse files
committed
Apply ruff formatting
1 parent 6b3a250 commit 1b5cad1

File tree

12 files changed

+1002
-461
lines changed

12 files changed

+1002
-461
lines changed

src/ChemInformant/api_helpers.py

Lines changed: 51 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
These functions are not intended for direct use by end-users, but are consumed
66
by the main API interface.
77
"""
8+
89
from __future__ import annotations
910

1011
import random
@@ -18,8 +19,8 @@
1819

1920
# --- Module Constants ---
2021
PUBCHEM_API_BASE = "https://pubchem.ncbi.nlm.nih.gov/rest/pug"
21-
PUG_VIEW_BASE = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data"
22-
REQUEST_TIMEOUT = 15
22+
PUG_VIEW_BASE = "https://pubchem.ncbi.nlm.nih.gov/rest/pug_view/data"
23+
REQUEST_TIMEOUT = 15
2324

2425
MAX_RETRIES, INITIAL_BACKOFF, MAX_BACKOFF = 5, 1, 16
2526
REQUEST_RATE_LIMIT = 5 # Requests per second
@@ -32,6 +33,7 @@
3233

3334
# --- Session & Caching ---
3435

36+
3537
def setup_cache(
3638
cache_name: str = "pubchem_cache",
3739
backend: str = "sqlite",
@@ -56,13 +58,18 @@ def setup_cache(
5658
"""
5759
global _session
5860
_session = requests_cache.CachedSession(
59-
cache_name = cache_name,
60-
backend = backend,
61-
expire_after = expire_after,
62-
allowable_codes= [200, 404, 503], # Cache "not found" and "server busy" responses
61+
cache_name=cache_name,
62+
backend=backend,
63+
expire_after=expire_after,
64+
allowable_codes=[
65+
200,
66+
404,
67+
503,
68+
], # Cache "not found" and "server busy" responses
6369
**kw,
6470
)
6571

72+
6673
def get_session() -> requests_cache.CachedSession:
6774
"""
6875
Gets the current cached session, initializing it with defaults if necessary.
@@ -80,6 +87,7 @@ def get_session() -> requests_cache.CachedSession:
8087

8188
# --- Core Fetching Logic ---
8289

90+
8391
def _execute_fetch(url: str) -> requests.Response:
8492
"""
8593
Executes a single GET request using the global session.
@@ -91,7 +99,10 @@ def _execute_fetch(url: str) -> requests.Response:
9199
"""
92100
return get_session().get(url, timeout=REQUEST_TIMEOUT)
93101

94-
def _fetch_with_ratelimit_and_retry(url: str) -> dict[str, Any] | list[Any] | str | None:
102+
103+
def _fetch_with_ratelimit_and_retry(
104+
url: str,
105+
) -> dict[str, Any] | list[Any] | str | None:
95106
"""
96107
Performs a GET request with rate-limiting and exponential backoff retry logic.
97108
@@ -135,23 +146,35 @@ def _fetch_with_ratelimit_and_retry(url: str) -> dict[str, Any] | list[Any] | st
135146
return None # Resource not found is a valid, final state.
136147

137148
if resp.status_code == 503:
138-
print(f"[ChemInformant] 503 Server Busy -> retry in {backoff:.1f}s", file=sys.stderr)
149+
print(
150+
f"[ChemInformant] 503 Server Busy -> retry in {backoff:.1f}s",
151+
file=sys.stderr,
152+
)
139153
else:
140154
resp.raise_for_status() # Trigger for other 4xx/5xx errors
141155

142156
except requests.exceptions.RequestException as e:
143-
print(f"[ChemInformant] Network error {e} -> retry in {backoff:.1f}s", file=sys.stderr)
157+
print(
158+
f"[ChemInformant] Network error {e} -> retry in {backoff:.1f}s",
159+
file=sys.stderr,
160+
)
144161

145162
time.sleep(backoff)
146-
backoff = min(MAX_BACKOFF, backoff * 2) + random.uniform(0, 1) # Exponential backoff with jitter
163+
backoff = min(MAX_BACKOFF, backoff * 2) + random.uniform(
164+
0, 1
165+
) # Exponential backoff with jitter
147166
retries += 1
148167

149-
print(f"[ChemInformant] Giving up after {MAX_RETRIES} retries for URL: {url}", file=sys.stderr)
168+
print(
169+
f"[ChemInformant] Giving up after {MAX_RETRIES} retries for URL: {url}",
170+
file=sys.stderr,
171+
)
150172
return None
151173

152174

153175
# --- Public-Facing Helper Functions ---
154176

177+
155178
def get_cids_by_name(name: str) -> list[int] | None:
156179
"""
157180
Fetches PubChem Compound IDs (CIDs) for a given chemical name.
@@ -175,10 +198,11 @@ def get_cids_by_name(name: str) -> list[int] | None:
175198
This function is used internally by get_properties() for name-to-CID resolution.
176199
End users should typically use get_properties() instead.
177200
"""
178-
url = f"{PUBCHEM_API_BASE}/compound/name/{quote(name)}/cids/JSON"
201+
url = f"{PUBCHEM_API_BASE}/compound/name/{quote(name)}/cids/JSON"
179202
data = _fetch_with_ratelimit_and_retry(url)
180203
return data.get("IdentifierList", {}).get("CID") if isinstance(data, dict) else None
181204

205+
182206
def get_cids_by_smiles(smiles: str) -> list[int] | None:
183207
"""
184208
Fetches PubChem Compound IDs (CIDs) for a given SMILES string.
@@ -204,11 +228,14 @@ def get_cids_by_smiles(smiles: str) -> list[int] | None:
204228
This function is used internally by get_properties() for SMILES-to-CID resolution.
205229
End users should typically use get_properties() instead.
206230
"""
207-
url = f"{PUBCHEM_API_BASE}/compound/smiles/{quote(smiles)}/cids/JSON"
231+
url = f"{PUBCHEM_API_BASE}/compound/smiles/{quote(smiles)}/cids/JSON"
208232
data = _fetch_with_ratelimit_and_retry(url)
209233
return data.get("IdentifierList", {}).get("CID") if isinstance(data, dict) else None
210234

211-
def get_batch_properties(cids: list[int], props: list[str]) -> dict[int, dict[str, Any]]:
235+
236+
def get_batch_properties(
237+
cids: list[int], props: list[str]
238+
) -> dict[int, dict[str, Any]]:
212239
"""
213240
Fetches multiple properties for a batch of CIDs in a single request,
214241
handling API pagination automatically.
@@ -258,7 +285,10 @@ def get_batch_properties(cids: list[int], props: list[str]) -> dict[int, dict[st
258285

259286
# Loop as long as the API provides a ListKey for the next page
260287
while list_key:
261-
print(f"[ChemInformant] Pagination detected, fetching next page with ListKey: {list_key}", file=sys.stderr)
288+
print(
289+
f"[ChemInformant] Pagination detected, fetching next page with ListKey: {list_key}",
290+
file=sys.stderr,
291+
)
262292
paginated_url = (
263293
f"{PUBCHEM_API_BASE}/compound/listkey/{list_key}"
264294
f"/property/{','.join(props)}/JSON"
@@ -307,7 +337,7 @@ def get_cas_for_cid(cid: int) -> str | None:
307337
It may be slower than standard property queries as it accesses
308338
detailed compound records rather than the property API.
309339
"""
310-
url = f"{PUG_VIEW_BASE}/compound/{cid}/JSON"
340+
url = f"{PUG_VIEW_BASE}/compound/{cid}/JSON"
311341
data = _fetch_with_ratelimit_and_retry(url)
312342
if isinstance(data, dict):
313343
for sec in data.get("Record", {}).get("Section", []):
@@ -317,11 +347,14 @@ def get_cas_for_cid(cid: int) -> str | None:
317347
for cas_sec in sub.get("Section", []):
318348
if cas_sec.get("TOCHeading") == "CAS":
319349
for info in cas_sec.get("Information", []):
320-
markup = info.get("Value", {}).get("StringWithMarkup")
350+
markup = info.get("Value", {}).get(
351+
"StringWithMarkup"
352+
)
321353
if markup and isinstance(markup, list) and markup:
322354
return markup[0].get("String")
323355
return None
324356

357+
325358
def get_synonyms_for_cid(cid: int) -> list[str]:
326359
"""
327360
Fetches all known synonyms (alternative names) for a given CID.
@@ -347,7 +380,7 @@ def get_synonyms_for_cid(cid: int) -> list[str]:
347380
This function is used internally by get_properties() and get_synonyms().
348381
The first synonym in the list is typically the preferred/most common name.
349382
"""
350-
url = f"{PUBCHEM_API_BASE}/compound/cid/{cid}/synonyms/JSON"
383+
url = f"{PUBCHEM_API_BASE}/compound/cid/{cid}/synonyms/JSON"
351384
data = _fetch_with_ratelimit_and_retry(url)
352385
if isinstance(data, dict):
353386
info_list = data.get("InformationList", {}).get("Information", [])

0 commit comments

Comments
 (0)