Skip to content

Commit fc23221

Browse files
committed
remove pdm.lock file and update pyproject.toml to use hatchling as the build backend; increment version to 0.2.0 and reorganize optional dependencies
1 parent 6e2c817 commit fc23221

File tree

4 files changed

+170
-1850
lines changed

4 files changed

+170
-1850
lines changed

examples/calibration_comparison.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
"""
2+
Comprehensive comparison of Splinator vs scikit-learn calibration methods
3+
"""
4+
import numpy as np
5+
import pandas as pd
6+
from sklearn.datasets import make_classification
7+
from sklearn.model_selection import train_test_split
8+
from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
9+
from sklearn.linear_model import LogisticRegression
10+
from sklearn.isotonic import IsotonicRegression
11+
from sklearn.calibration import calibration_curve
12+
from sklearn.metrics import brier_score_loss, log_loss
13+
from splinator.estimators import LinearSplineLogisticRegression
14+
from splinator.metrics import expected_calibration_error, spiegelhalters_z_statistic
15+
import time
16+
import warnings
17+
warnings.filterwarnings('ignore')
18+
19+
def evaluate_calibrator(y_true, y_pred_calibrated, method_name):
20+
"""Evaluate calibration performance with multiple metrics"""
21+
results = {
22+
'method': method_name,
23+
'brier_score': brier_score_loss(y_true, y_pred_calibrated),
24+
'log_loss': log_loss(y_true, y_pred_calibrated),
25+
'ece': expected_calibration_error(y_true, y_pred_calibrated, n_bins=10),
26+
'spiegelhalter_z': abs(spiegelhalters_z_statistic(y_true, y_pred_calibrated))
27+
}
28+
return results
29+
30+
def run_comparison(n_samples=50000, n_features=20, n_knots=50):
31+
"""Run comprehensive comparison"""
32+
print(f"\nRunning comparison with {n_samples} samples, {n_features} features, {n_knots} knots")
33+
34+
# Generate data
35+
# Ensure n_informative + n_redundant < n_features
36+
n_informative = max(1, int(n_features * 0.6)) # 60% informative
37+
n_redundant = max(1, int(n_features * 0.2)) # 20% redundant
38+
39+
X, y = make_classification(n_samples=n_samples, n_features=n_features,
40+
n_informative=n_informative, n_redundant=n_redundant,
41+
flip_y=0.1, random_state=42)
42+
43+
# Split data
44+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
45+
X_train, X_cal, y_train, y_cal = train_test_split(X_train, y_train, test_size=0.25, random_state=42)
46+
47+
# Train base classifier
48+
clf = GradientBoostingClassifier(n_estimators=100, random_state=42)
49+
clf.fit(X_train, y_train)
50+
51+
# Get uncalibrated predictions
52+
pred_cal = clf.predict_proba(X_cal)[:, 1]
53+
pred_test = clf.predict_proba(X_test)[:, 1]
54+
55+
results = []
56+
57+
# Uncalibrated baseline
58+
results.append(evaluate_calibrator(y_test, pred_test, 'Uncalibrated'))
59+
60+
# 1. Sigmoid Calibration (Platt Scaling)
61+
start = time.time()
62+
lr = LogisticRegression()
63+
lr.fit(pred_cal.reshape(-1, 1), y_cal)
64+
lr_calibrated = lr.predict_proba(pred_test.reshape(-1, 1))[:, 1]
65+
sigmoid_time = time.time() - start
66+
result = evaluate_calibrator(y_test, lr_calibrated, 'Sigmoid (sklearn)')
67+
result['fit_time'] = sigmoid_time
68+
results.append(result)
69+
70+
# 2. Isotonic Regression
71+
start = time.time()
72+
ir = IsotonicRegression(out_of_bounds='clip')
73+
ir.fit(pred_cal, y_cal)
74+
ir_calibrated = ir.predict(pred_test)
75+
isotonic_time = time.time() - start
76+
result = evaluate_calibrator(y_test, ir_calibrated, 'Isotonic (sklearn)')
77+
result['fit_time'] = isotonic_time
78+
results.append(result)
79+
80+
# 3. Linear Spline Logistic Regression (Splinator)
81+
start = time.time()
82+
lslr = LinearSplineLogisticRegression(
83+
n_knots=n_knots,
84+
monotonicity="increasing",
85+
method='SLSQP',
86+
C=100,
87+
two_stage_fitting_initial_size=min(2000, len(y_cal))
88+
)
89+
lslr.fit(pred_cal.reshape(-1, 1), y_cal)
90+
lslr_calibrated = lslr.predict(pred_test.reshape(-1, 1))
91+
lslr_time = time.time() - start
92+
result = evaluate_calibrator(y_test, lslr_calibrated, f'LSLR (n_knots={n_knots})')
93+
result['fit_time'] = lslr_time
94+
results.append(result)
95+
96+
# Additional LSLR configurations
97+
for n_k in [20, 100]:
98+
if n_k != n_knots:
99+
start = time.time()
100+
lslr2 = LinearSplineLogisticRegression(
101+
n_knots=n_k,
102+
monotonicity="increasing",
103+
method='SLSQP',
104+
C=100,
105+
two_stage_fitting_initial_size=min(2000, len(y_cal))
106+
)
107+
lslr2.fit(pred_cal.reshape(-1, 1), y_cal)
108+
lslr2_calibrated = lslr2.predict(pred_test.reshape(-1, 1))
109+
lslr2_time = time.time() - start
110+
result = evaluate_calibrator(y_test, lslr2_calibrated, f'LSLR (n_knots={n_k})')
111+
result['fit_time'] = lslr2_time
112+
results.append(result)
113+
114+
return pd.DataFrame(results)
115+
116+
# Run comparisons with different dataset sizes
117+
results_list = []
118+
119+
for n_samples in [10000, 50000]:
120+
for n_features in [10, 30]:
121+
df = run_comparison(n_samples=n_samples, n_features=n_features, n_knots=50)
122+
df['n_samples'] = n_samples
123+
df['n_features'] = n_features
124+
results_list.append(df)
125+
126+
# Combine all results
127+
all_results = pd.concat(results_list, ignore_index=True)
128+
129+
# Display summary
130+
print("\n" + "="*80)
131+
print("CALIBRATION COMPARISON RESULTS")
132+
print("="*80)
133+
print("\nLower is better for: brier_score, log_loss, ece, spiegelhalter_z")
134+
print("\nAverage performance across all experiments:")
135+
summary = all_results.groupby('method')[['brier_score', 'log_loss', 'ece', 'spiegelhalter_z', 'fit_time']].mean()
136+
print(summary.round(4))
137+
138+
print("\n" + "="*80)
139+
print("RELATIVE PERFORMANCE (% improvement over Uncalibrated)")
140+
print("="*80)
141+
142+
uncalibrated_scores = all_results[all_results['method'] == 'Uncalibrated'][['brier_score', 'log_loss', 'ece', 'spiegelhalter_z']].mean()
143+
for method in ['Sigmoid (sklearn)', 'Isotonic (sklearn)', 'LSLR (n_knots=50)']:
144+
method_scores = all_results[all_results['method'] == method][['brier_score', 'log_loss', 'ece', 'spiegelhalter_z']].mean()
145+
improvement = (uncalibrated_scores - method_scores) / uncalibrated_scores * 100
146+
print(f"\n{method}:")
147+
for metric in improvement.index:
148+
print(f" {metric}: {improvement[metric]:.1f}% improvement")
149+
150+
# Save detailed results
151+
all_results.to_csv('calibration_comparison_results.csv', index=False)
152+
print("\nDetailed results saved to 'calibration_comparison_results.csv'")

0 commit comments

Comments
 (0)