Skip to content

Commit 1673edf

Browse files
committed
Switch to pytest-benchmark
1 parent fc6bfef commit 1673edf

File tree

3 files changed

+250
-137
lines changed

3 files changed

+250
-137
lines changed

benchmark.py

Lines changed: 0 additions & 137 deletions
This file was deleted.

pytest-benchmark.ini

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
[tool:pytest-benchmark]
2+
# Benchmark configuration for ChemInformant
3+
min_rounds = 3
4+
max_time = 30.0
5+
min_time = 0.1
6+
timer = time.perf_counter
7+
disable_gc = true
8+
warmup = true
9+
warmup_iterations = 2
10+
11+
# Output options
12+
sort = mean
13+
columns = min,max,mean,stddev,rounds,iterations
14+
histogram = true
15+
16+
# Save results
17+
save = .benchmarks
18+
save-data = true
19+
autosave = true
20+
21+
# Compare with previous runs
22+
compare = 0001
23+
compare-fail = mean:10%

tests/test_benchmarks.py

Lines changed: 227 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
"""
2+
Benchmark tests for ChemInformant performance using pytest-benchmark.
3+
4+
This module replaces the standalone benchmark.py script with integrated
5+
pytest benchmarks that can be run as part of the standard test suite.
6+
"""
7+
8+
import statistics
9+
import time
10+
11+
import pytest
12+
13+
import ChemInformant as ci
14+
15+
# Sample drug names for benchmarking
16+
SAMPLE_DRUGS = [
17+
"aspirin", "caffeine", "acetaminophen", "ibuprofen", "naproxen",
18+
"diclofenac", "celecoxib", "meloxicam", "indomethacin", "piroxicam",
19+
"amoxicillin", "azithromycin", "ciprofloxacin", "doxycycline", "penicillin",
20+
"atorvastatin", "simvastatin", "rosuvastatin", "pravastatin", "lovastatin",
21+
"metformin", "insulin", "glipizide", "glyburide", "pioglitazone",
22+
"lisinopril", "enalapril", "losartan", "valsartan", "amlodipine",
23+
"metoprolol", "atenolol", "propranolol", "carvedilol", "bisoprolol",
24+
"warfarin", "heparin", "clopidogrel", "aspirin", "rivaroxaban",
25+
"omeprazole", "lansoprazole", "pantoprazole", "esomeprazole", "ranitidine",
26+
"fluoxetine", "sertraline", "paroxetine", "citalopram", "escitalopram",
27+
]
28+
29+
BENCHMARK_PROPERTIES = [
30+
"molecular_weight",
31+
"xlogp",
32+
"cas",
33+
"iupac_name",
34+
"canonical_smiles",
35+
"molecular_formula"
36+
]
37+
38+
39+
class TestPerformanceBenchmarks:
40+
"""Performance benchmarks for ChemInformant core functionality."""
41+
42+
@pytest.mark.benchmark(group="single_compound")
43+
def test_single_compound_lookup(self, benchmark):
44+
"""Benchmark single compound property lookup."""
45+
def single_lookup():
46+
return ci.get_properties(["aspirin"], ["molecular_weight", "xlogp"])
47+
48+
result = benchmark(single_lookup)
49+
assert len(result) == 1
50+
assert result.iloc[0]["status"] == "OK"
51+
52+
@pytest.mark.benchmark(group="batch_processing")
53+
def test_small_batch_processing(self, benchmark):
54+
"""Benchmark small batch processing (10 compounds)."""
55+
compounds = SAMPLE_DRUGS[:10]
56+
57+
def batch_lookup():
58+
return ci.get_properties(compounds, BENCHMARK_PROPERTIES[:3])
59+
60+
result = benchmark(batch_lookup)
61+
assert len(result) <= len(compounds) # Some compounds might fail
62+
63+
@pytest.mark.benchmark(group="batch_processing")
64+
def test_medium_batch_processing(self, benchmark):
65+
"""Benchmark medium batch processing (25 compounds)."""
66+
compounds = SAMPLE_DRUGS[:25]
67+
68+
def batch_lookup():
69+
return ci.get_properties(compounds, BENCHMARK_PROPERTIES[:4])
70+
71+
result = benchmark(batch_lookup)
72+
assert len(result) <= len(compounds)
73+
74+
@pytest.mark.benchmark(group="batch_processing")
75+
def test_large_batch_processing(self, benchmark):
76+
"""Benchmark large batch processing (50 compounds)."""
77+
compounds = SAMPLE_DRUGS
78+
79+
def batch_lookup():
80+
return ci.get_properties(compounds, BENCHMARK_PROPERTIES)
81+
82+
result = benchmark(batch_lookup)
83+
assert len(result) <= len(compounds)
84+
85+
@pytest.mark.benchmark(group="caching")
86+
def test_cache_performance(self, benchmark):
87+
"""Benchmark cache hit performance."""
88+
# First, populate the cache
89+
test_compounds = SAMPLE_DRUGS[:5]
90+
ci.get_properties(test_compounds, ["molecular_weight"])
91+
92+
# Now benchmark cache hits
93+
def cached_lookup():
94+
return ci.get_properties(test_compounds, ["molecular_weight"])
95+
96+
result = benchmark(cached_lookup)
97+
assert len(result) <= len(test_compounds)
98+
99+
@pytest.mark.benchmark(group="convenience_api")
100+
def test_convenience_functions(self, benchmark):
101+
"""Benchmark convenience function performance."""
102+
def convenience_lookup():
103+
results = []
104+
for compound in SAMPLE_DRUGS[:10]:
105+
try:
106+
weight = ci.get_weight(compound)
107+
if weight is not None:
108+
results.append(weight)
109+
except Exception:
110+
continue
111+
return results
112+
113+
result = benchmark(convenience_lookup)
114+
assert isinstance(result, list)
115+
116+
@pytest.mark.benchmark(group="mixed_identifiers")
117+
def test_mixed_identifier_types(self, benchmark):
118+
"""Benchmark handling of mixed identifier types."""
119+
mixed_identifiers = [
120+
"aspirin", # name
121+
2244, # CID
122+
"CC(=O)OC1=CC=CC=C1C(=O)O", # SMILES
123+
"caffeine", # name
124+
2519, # CID
125+
]
126+
127+
def mixed_lookup():
128+
return ci.get_properties(mixed_identifiers, ["molecular_weight", "cas"])
129+
130+
result = benchmark(mixed_lookup)
131+
assert len(result) <= len(mixed_identifiers)
132+
133+
@pytest.mark.benchmark(group="error_handling")
134+
def test_error_handling_performance(self, benchmark):
135+
"""Benchmark performance with invalid compounds."""
136+
invalid_compounds = [
137+
"invalid_compound_name_12345",
138+
"another_fake_compound",
139+
"aspirin", # valid one
140+
999999999, # invalid CID
141+
"caffeine", # valid one
142+
]
143+
144+
def error_handling_lookup():
145+
return ci.get_properties(invalid_compounds, ["molecular_weight"])
146+
147+
result = benchmark(error_handling_lookup)
148+
# Should have some results, but not all
149+
assert len(result) <= len(invalid_compounds)
150+
151+
152+
@pytest.mark.benchmark(group="comparison")
153+
class TestComparisonBenchmarks:
154+
"""Benchmarks comparing different approaches."""
155+
156+
def test_batch_vs_individual_calls(self, benchmark):
157+
"""Compare batch processing vs individual calls."""
158+
compounds = SAMPLE_DRUGS[:10]
159+
properties = ["molecular_weight", "xlogp"]
160+
161+
def individual_calls():
162+
results = []
163+
for compound in compounds:
164+
try:
165+
result = ci.get_properties([compound], properties)
166+
if not result.empty:
167+
results.append(result.iloc[0])
168+
except Exception:
169+
continue
170+
return results
171+
172+
# Benchmark individual calls
173+
individual_result = benchmark(individual_calls)
174+
175+
# Compare with batch call (not benchmarked, just for reference)
176+
batch_result = ci.get_properties(compounds, properties)
177+
178+
# Both should return similar number of results
179+
assert len(individual_result) <= len(batch_result) + 2 # Allow some variance
180+
181+
182+
# Utility functions for benchmark analysis
183+
def analyze_benchmark_results(benchmark_results):
184+
"""Analyze and summarize benchmark results."""
185+
if not benchmark_results:
186+
return {}
187+
188+
return {
189+
"min_time": min(benchmark_results),
190+
"max_time": max(benchmark_results),
191+
"mean_time": statistics.mean(benchmark_results),
192+
"median_time": statistics.median(benchmark_results),
193+
"std_dev": statistics.stdev(benchmark_results) if len(benchmark_results) > 1 else 0,
194+
}
195+
196+
197+
# Custom benchmark fixtures
198+
@pytest.fixture
199+
def fresh_cache():
200+
"""Fixture to ensure fresh cache for certain tests."""
201+
# This would clear cache if we had a public cache clearing method
202+
# For now, just yield
203+
yield
204+
# Cleanup if needed
205+
206+
207+
# Performance thresholds (can be adjusted based on system performance)
208+
PERFORMANCE_THRESHOLDS = {
209+
"single_compound_max_time": 5.0, # seconds
210+
"batch_10_max_time": 10.0, # seconds
211+
"cache_hit_max_time": 1.0, # seconds
212+
}
213+
214+
215+
def test_performance_thresholds():
216+
"""Test that performance meets minimum thresholds."""
217+
# This is a simple performance regression test
218+
start_time = time.time()
219+
result = ci.get_properties(["aspirin"], ["molecular_weight"])
220+
end_time = time.time()
221+
222+
elapsed = end_time - start_time
223+
assert elapsed < PERFORMANCE_THRESHOLDS["single_compound_max_time"], \
224+
f"Single compound lookup took {elapsed:.2f}s, exceeds threshold"
225+
226+
assert len(result) == 1
227+
assert result.iloc[0]["status"] == "OK"

0 commit comments

Comments
 (0)