Interpretable-Calibration-Machine-Learning/main.py at main · 1997alireza/Interpretable-Calibration-Machine-Learning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
from sklearn.model_selection import train_test_split
import numpy as np
from calibration.PCT.pct import ProbabilityCalibrationTree
from calibration.PS import PlattScaling
from calibration.IR import IsotonicRegression
from estimators import fit_estimator
import data_preparation
from data_preparation import prepare_dataset
from evaluation import score, ece_score_fixed_bins, probabilistic_count, auc_v_score, auc_v_relaxed_score, pde_score
import warnings
import pickle
import os
from tools import bins_number_to_bins_partition, scores_to_bins
import pandas as pd
import statistics


def print_progress_bar(iteration, total, prefix='Progress:', suffix='Complete', decimals=1, length=40, fill='█'):
    """
    Call in a loop to create terminal progress bar, removes the last printed line on iterations more than 0
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end="\r")
    # Print New Line on Complete
    if iteration == total:
        print()


def scores_stat(values, title):
    """

    :param values:
    :param title:
    :return: stats including mean and standard deviation
    """
    # top10 = np.array(values)
    # if positive_measure:
    #     top10 = top10[np.argpartition(top10, -10)[-10:]]
    # else:
    #     top10 = top10[np.argpartition(top10, 10)[:10]]

    # if positive_measure:
    #     print('max among all {}'.format(np.max(values)))
    # else:
    #     print('min among all {}'.format(np.min(to_write_row)))

    # top10_mean, top10_sd = statistics.mean(top10), statistics.stdev(top10)
    mean, sd = statistics.mean(values), statistics.stdev(values)

    # print('top 10 iterations stat: mean={}, sd={}'.format(top10_mean, top10_sd))
    print('{} stat: mean={}, sd={}'.format(title, mean, sd))

    return round(mean, 3), round(sd, 3)


def run_experiments(estimator, calibrator, dataset=data_preparation.DATASETS_LIST,
                    experiments_iteration=10):
    """
    :param estimator: one or a list of estimators name
    :param calibrator: one or a list of calibrators name,
     possible calibrators are {
                        'PS', 'IR',
                        'PCT ?',
                        'PCT-32BINS ?'
                        }, in which ? is the regressor_type for class ProbabilityCalibrationTree
                         (available in ProbabilityCalibrationTree.__init__ doc)
    :param dataset: one or a list of datasets name
    :param experiments_iteration:
    :return:
    """
    BASE_TRAIN_PROPORTION, PCT_TRAIN_PROPORTION, TEST_PROPORTION = .45, .45, .1
    AUC_V_EPS_LIST = [.005, .01, .02, .04, .06, .08, .1, .2, .3, .4, .5, .6, .7, .8, .9, 1.]
    LOG_FILE = 'log.csv'

    if isinstance(estimator, list):
        estimators_list = estimator
    else:
        estimators_list = [estimator]

    if isinstance(calibrator, list):
        calibrators_list = calibrator
    else:
        calibrators_list = [calibrator]

    if isinstance(dataset, list):
        datasets_list = dataset
    else:
        datasets_list = [dataset]

    pd.DataFrame([['Dataset', 'PDE -mean', 'PDE -sd']]).to_csv(LOG_FILE, index=False, header=False)

    for dataset in datasets_list:
        X, y, classes = prepare_dataset(dataset)

        for estimator in estimators_list:
            for calibrator in calibrators_list:
                if (calibrator.split()[0] == 'PCT' or calibrator.split()[0] == 'PCT-32BINS') \
                        and len(calibrator.split()) == 1:
                    calibrator = calibrator + ' ' + ProbabilityCalibrationTree.DEFAULT_REGRESSOR
                ignore_estimator = (calibrator == 'PCT average' or calibrator == 'PCT-32BINS average')

                print('experiment setup: e: {}, c: {}, d: {}'.format(estimator, calibrator, dataset))

                rmse_scores, closs_scores, auc_scores, ece_scores = [], [], [], []
                # ece_f_scores = []
                pde_scores = []
                test_tree_n_bins, test_tree_probabilistic_counts = [], []
                train_tree_n_bins, train_tree_probabilistic_counts = [], []
                auc_v_scores_2d = [[] for _ in range(len(AUC_V_EPS_LIST))]
                auc_v_relaxed_scores_2d = [[] for _ in range(len(AUC_V_EPS_LIST))]

                for experiment_i in range(experiments_iteration):
                    X_rem, X_test, y_rem, y_test = train_test_split(X, y, test_size=TEST_PROPORTION,
                                                                    shuffle=True, random_state=experiment_i)
                    X_base_train, X_PCT_train, y_base_train, y_PCT_train = \
                        train_test_split(X_rem, y_rem, train_size=BASE_TRAIN_PROPORTION, shuffle=False)

                    # loading the estimator
                    if ignore_estimator:
                        estimator_model = lambda X: np.zeros(X.shape[0])
                    else:
                        estimator_adr = './saved_models/estimators/{}/{}_{}.pickle'.format(dataset, experiment_i, estimator)
                        try:
                            with open(estimator_adr, 'rb') as f:
                                estimator_models_obj = pickle.load(f)
                        except FileNotFoundError:
                            estimator_models_obj = fit_estimator(X_base_train, y_base_train, classes, estimator)
                            os.makedirs(os.path.dirname(estimator_adr), exist_ok=True)
                            with open(estimator_adr, 'wb') as f:
                                pickle.dump(estimator_models_obj, f)

                        estimator_model = lambda X: np.array([model(X) for model in estimator_models_obj]).T

                    S_PCT_train = estimator_model(X_PCT_train)
                    S_test = estimator_model(X_test)

                    calibrator_adr = './saved_models/calibrators/{}/{}_{}_{}.pickle'.format(
                        dataset, experiment_i, estimator, calibrator.replace(' ', '-'))

                    if calibrator.split()[0] == 'PCT' or calibrator.split()[0] == 'PCT-32BINS':
                        try:
                            with open(calibrator_adr, 'rb') as f:
                                calibrator_model = pickle.load(f)
                        except FileNotFoundError:
                            regressor_type = calibrator.split()[1]
                            if calibrator.split()[0] == 'PCT-32BINS':
                                be_pruned = False
                                n_required_test_leaves = 32
                            else:
                                be_pruned = True
                                n_required_test_leaves = None

                            calibrator_model = ProbabilityCalibrationTree(classes,
                                                                          be_pruned=be_pruned,
                                                                          n_required_test_leaves=n_required_test_leaves,
                                                                          regressor_type=regressor_type)

                            calibrator_model.fit(X_PCT_train, S_PCT_train, y_PCT_train)
                            os.makedirs(os.path.dirname(calibrator_adr), exist_ok=True)
                            with open(calibrator_adr, 'wb') as f:
                                pickle.dump(calibrator_model, f)

                        preds_test, leaves_test = calibrator_model.predict_proba_w_leaves(X_test, S_test)
                    elif calibrator.split()[0] == 'PS' or calibrator.split()[0] == 'IR':
                        calibrator_class = PlattScaling if calibrator.split()[0] == 'PS' else IsotonicRegression
                        try:
                            with open(calibrator_adr, 'rb') as f:
                                calibrator_model = pickle.load(f)
                        except FileNotFoundError:
                            calibrator_model = calibrator_class(classes)
                            calibrator_model.fit(S_PCT_train, y_PCT_train)
                            os.makedirs(os.path.dirname(calibrator_adr), exist_ok=True)
                            with open(calibrator_adr, 'wb') as f:
                                pickle.dump(calibrator_model, f)
                        preds_test = calibrator_model.predict_proba(S_test)
                    else:
                        raise Exception('calibration method {} not found'.format(calibrator))

                    rmse_scores.append(score(preds_test, y_test, classes, 'rmse'))
                    closs_scores.append(score(preds_test, y_test, classes, 'classification'))
                    auc_scores.append(score(preds_test, y_test, classes, 'auc'))
                    ece_scores.append(score(preds_test, y_test, classes, 'ece32'))

                    bins = scores_to_bins(preds_test)

                    for i, eps in enumerate(AUC_V_EPS_LIST):
                        auc_v = auc_v_score(preds_test, y_test, bins, classes, eps)
                        auc_v_scores_2d[i].append(auc_v)

                        auc_v_relaxed = auc_v_relaxed_score(preds_test, y_test, classes, eps)
                        auc_v_relaxed_scores_2d[i].append(auc_v_relaxed)

                    if calibrator[:3] == 'PCT' and len(calibrator.split()) > 1 and calibrator.split()[1] == 'average':
                        bins = bins_number_to_bins_partition(leaves_test)

                        # ece_f_scores.append(ece_score_fixed_bins(preds_test, y_test, bins, classes))

                        ### PC and actual count of bins

                        # test_tree_n_bins.append(len(bins))
                        # test_tree_probabilistic_counts.append(probabilistic_count(bins))
                        #
                        # _, leaves_train = calibrator_model.predict_proba_w_leaves(X_PCT_train, S_PCT_train)
                        # train_tree_n_bins.append(len(np.unique(leaves_train)))
                        # train_cells = bins_number_to_bins_partition(leaves_train)
                        # train_tree_probabilistic_counts.append(probabilistic_count(train_cells))

                    if calibrator[:3] == 'PCT':
                        # PDE can be used for any method that generates bins even though the elements in a bin may have different scores
                        bins = bins_number_to_bins_partition(leaves_test)

                        pde_scores.append(pde_score(preds_test, y_test, bins, classes))


                    print_progress_bar(experiment_i+1, experiments_iteration)


                ### SCORES STAT and LOG

                to_write_row = [dataset]

                # to_write_row.extend(scores_stat(rmse_scores, 'RMSE'))
                # to_write_row.extend(scores_stat(closs_scores, 'Classification loss'))
                # to_write_row.extend(scores_stat(auc_scores, 'AUC'))

                # if None in ece_scores:
                #     print('ECE 32 ::\nno sufficient samples to split into the required bins')
                #     to_write_row.extend(['None', 'None'])
                # else:
                #     to_write_row.extend(scores_stat(ece_scores, 'ECE 32'))

                if len(pde_scores) > 0:
                    to_write_row.extend(scores_stat(pde_scores, 'PDE'))

                # if calibrator[:3] == 'PCT' and len(calibrator.split()) > 1 and calibrator.split()[1] == 'average':
                    # the methods that generates bins in each of which elements have the same scores
                    # to_write_row.extend(scores_stat(ece_f_scores, 'ECE FIXED BINS'))

                    # print('\ntest n_bins in average: {}'.format(np.mean(test_tree_n_bins)))
                    # print('test PC in average: {}'.format(np.mean(test_tree_probabilistic_counts)))
                    #
                    # print('train n_bins in average: {}'.format(np.mean(train_tree_n_bins)))
                    # print('train PC in average: {}'.format(np.mean(train_tree_probabilistic_counts)))


                # for i, eps in enumerate(AUC_V_EPS_LIST):
                #     avg, _ = scores_stat(auc_v_scores_2d[i], 'AUC_v eps={}'.format(eps))
                #
                # for i, eps in enumerate(AUC_V_EPS_LIST):
                #     avg, _ = scores_stat(auc_v_relaxed_scores_2d[i], 'AUC_v_relaxed eps={}'.format(eps))

                pd.DataFrame([to_write_row]).to_csv(LOG_FILE, mode='a', index=False, header=False)
                print('--------------------------------------')


# TODO: remove ece_fixed_bins. since in the cases that ece_fixed_bins is applicable, it is equal to PDE

if __name__ == '__main__':
    warnings.filterwarnings("ignore")
    run_experiments('svm', ['PCT-32BINS average'])