Interpretable-Calibration-Machine-Learning/tools.py at main · 1997alireza/Interpretable-Calibration-Machine-Learning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
from sklearn import tree
import matplotlib.pyplot as plt
import numpy as np
from sklearn import metrics


def plot_tree(dt_clf, attributes):
    plt.figure(figsize=(25, 20))
    _ = tree.plot_tree(dt_clf,
                       feature_names=attributes[:-1, 0],
                       class_names=attributes[-1][1],
                       filled=True)
    plt.show()


def rmse_loss(P, Y):
    """
    Root mean squared error
    :param P: probabilities, array-like of shape (n_samples, n_classes)
    :param Y: labels in one-hot format, array-like of shape (n_samples, n_classes)
    :return:
    """
    return np.average(np.square(P-Y)) ** 0.5


def auc(P, Y):
    """
    Area under curve
    :param P: probabilities, array-like of shape (n_samples, n_classes)
    :param Y: labels in one-hot format, array-like of shape (n_samples, n_classes)
    :return:
    """
    auc_sum = 0
    weight_sum = 0
    for class_i in range(Y.shape[1]):
        P_i = P[:, class_i]
        Y_i = Y[:, class_i]
        w = np.where(Y_i == 1)[0].shape[0]
        if w == 0:
            continue
        fpr, tpr, _ = metrics.roc_curve(Y_i, P_i)
        auc_sum += metrics.auc(fpr, tpr) * w
        weight_sum += w
    return auc_sum / weight_sum


def classification_loss(P, Y):
    """
    Classification loss using the maximum probability
    :param P: probabilities, array-like of shape (n_samples, n_classes)
    :param Y: labels in one-hot format, array-like of shape (n_samples, n_classes)
    :return:
    """
    loss = 0
    for i in range(len(P)):
        if np.argmax(P[i]) != np.argmax(Y[i]):
            loss += 1

    return loss / len(P)


def pce_avg(partitions, norm=1):
    """
    Partition calibration error (PCE) for binary classification

    :param partitions: 2d list of (p, y) pairs each of which represents the predicted probability and true label (0 or 1)
    each row of partitions represents the samples of one bin
    :param norm: p in lp-norm for PCE

    :return: weighted average of PCE of bins
    """

    assert norm >= 1 and norm % 1 == 0

    weight_sum = 0
    weighted_pce_sum = 0.0

    for partition in partitions:
        prob_sum = 0
        label_sum = 0  # = number of positive samples
        for s in partition:
            prob_sum += s[0]
            label_sum += s[1]

        pce = abs(prob_sum - label_sum) / len(partition)

        weighted_pce_sum += (pce ** norm) * len(partition)
        weight_sum += len(partition)

    return (weighted_pce_sum / weight_sum) ** (1/norm)


def ppd_avg(partitions, norm=1):
    """
    Partition probability deviation (PPD) for binary classification
     PPD measures the average deviation of each score in a bin from their average label

    :param partitions: 2d list of (p, y) pairs each of which represents the predicted probability and true label (0 or 1)
    each row of partitions represents the samples of one bin
    :param norm: p in lp-norm for PPD

    :return: weighted average of PPD
    """

    assert norm >= 1 and norm % 1 == 0

    weight_sum = 0
    weighted_ppd_sum = 0.0

    for partition in partitions:
        label_sum = 0  # = number of positive samples
        for s in partition:
            label_sum += s[1]

        label_avg = label_sum / len(partition)

        deviation_sum = 0
        for s in partition:
            prob = s[0]
            deviation_sum += abs(prob - label_avg)

        ppd = deviation_sum / len(partition)

        weighted_ppd_sum += (ppd ** norm) * len(partition)
        weight_sum += len(partition)

    return (weighted_ppd_sum / weight_sum) ** (1 / norm)


def classwise_ece(P, Y, n_bins, norm=1):
    """
    Classwise expected calibration error

    :param P: probabilities, array-like of shape (n_samples, n_classes)
    :param Y: labels in one-hot format, array-like of shape (n_samples, n_classes)
    :param n_bins: number of bins samples are being partitioned to
    :param norm: p in lp-norm for PCE
    :return:
    """
    ece_sum = 0
    n_classes = Y.shape[1]
    for class_i in range(n_classes):
        P_i = P[:, class_i]
        Y_i = Y[:, class_i]
        pairs = np.stack((P_i, Y_i), axis=1)
        sorted_pairs = np.sort(pairs, axis=0)  # sort along the probs
        partitions = np.array_split(sorted_pairs, n_bins)
        ece_sum += pce_avg(partitions, norm)
    return ece_sum / n_classes


def classwise_ece_fixed_bins(P, Y, bins, norm=1):
    """
    Classwise expected calibration error with prefixed bins

    :param P: probabilities, array-like of shape (n_samples, n_classes)
    :param Y: labels in one-hot format, array-like of shape (n_samples, n_classes)
    :param bins: mapping from bins to sample_ids, 2d array [b1:[sample_id, ...], b2, ...]
    :param norm: p in lp-norm for PCE
    :return: ECE
    """

    ece_sum = 0
    n_classes = Y.shape[1]
    for class_i in range(n_classes):
        P_i = P[:, class_i]
        Y_i = Y[:, class_i]
        pairs = np.stack((P_i, Y_i), axis=1)
        partitions = []
        for bin in bins:
            partition = []
            for s_i in bin:
                partition.append(pairs[s_i])
            partitions.append(partition)

        ece_sum += pce_avg(partitions, norm)
    return ece_sum / n_classes


def classwise_pde(P, Y, bins, norm=1):
    """
    Classwise probability deviation error (with prefixed bins)
     It measures the average deviation of each probability in a bin from their average label for each specific class.
     Then an average is taken over the average deviation in all classes.

    :param P: probabilities, array-like of shape (n_samples, n_classes)
    :param Y: labels in one-hot format, array-like of shape (n_samples, n_classes)
    :param bins: mapping from bins to sample_ids, 2d array [b1:[sample_id, ...], b2, ...]
    :param norm: p in lp-norm for PPD
    :return: PDE
    """

    pde_sum = 0
    n_classes = Y.shape[1]
    for class_i in range(n_classes):
        P_i = P[:, class_i]
        Y_i = Y[:, class_i]
        pairs = np.stack((P_i, Y_i), axis=1)
        partitions = []
        for bin in bins:
            partition = []
            for s_i in bin:
                partition.append(pairs[s_i])
            partitions.append(partition)

        pde_sum += ppd_avg(partitions, norm)
    return pde_sum / n_classes


def bins_number_to_bins_partition(bins_number):
    """

    :param bins_number: mapping from sample id to bin id, array-like of shape (n_samples,)
    :return bins: mapping from bins to sample ids
    """
    bin_to_sample = dict()
    for s_i in range(len(bins_number)):
        b = bins_number[s_i]
        if b in bin_to_sample:
            bin_to_sample[b].append(s_i)
        else:
            bin_to_sample[b] = [s_i]

    return list(bin_to_sample.values())


def scores_to_bins(P):
    """
    create bins such that the points with exact same probabilities for all classes are in the same bin
    :param P: :param P: probabilities, array-like of shape (n_samples, n_classes)
    :return: bins: mapping from bins to sample id
    """

    bins = []  # 2d array of sample ids
    values = []  # 1d array of the prob assigned to a bin
    for sample_id in range(len(P)):
        added = False
        for b_i, v in enumerate(values):
            if all(v == P[sample_id]):
                bins[b_i].append(sample_id)
                added = True
                break
        if not added:
            bins.append([sample_id])
            values.append(P[sample_id])

    return bins


def labels_to_one_hot(y, classes):
    """
    :param y: labels, array-like of shape (n_samples,)
    :param classes: list of classes
    :return: Y: one-hot vectors representing labels, array-like of shape (n_samples, n_classes)
    """
    Y = np.zeros((len(y), len(classes)), dtype=int)
    Y[np.where([y[i] == classes for i in range(len(y))])] = 1
    return Y