Skip to content

Commit 2f1e866

Browse files
author
Srikumar Sastry
committed
Implementation of Cost Effective Active Learning
1 parent 6445b3a commit 2f1e866

File tree

1 file changed

+79
-0
lines changed

1 file changed

+79
-0
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""
2+
This is a modified implementation of the algorithm Cost Effective Active Learning
3+
(Pl. refer - https://arxiv.org/abs/1701.03551). This version not only picks up the
4+
top K uncertain samples but also picks up the top N highly confident samples that
5+
may represent information and diversity. It is better than the original implementation
6+
as it does not involve tuning the confidence threshold parameter for every dataset.
7+
"""
8+
9+
from keras.datasets import mnist
10+
import numpy as np
11+
from modAL.models import ActiveLearner
12+
from sklearn.ensemble import RandomForestClassifier
13+
from scipy.special import entr
14+
15+
16+
(X_train, y_train), (X_test, y_test) = mnist.load_data()
17+
18+
X_train = X_train / 255
19+
X_test = X_test / 255
20+
y_train = y_train.astype(np.uint8)
21+
y_test = y_test.astype(np.uint8)
22+
23+
X_train = X_train.reshape(-1, 784)
24+
X_test = X_test.reshape(-1, 784)
25+
26+
model = RandomForestClassifier(n_estimators=100)
27+
28+
INITIAL_SET_SIZE = 32
29+
30+
U_x = np.copy(X_train)
31+
U_y = np.copy(y_train)
32+
33+
ind = np.random.choice(range(len(U_x)), size=INITIAL_SET_SIZE)
34+
35+
X_initial = U_x[ind]
36+
y_initial = U_y[ind]
37+
38+
U_x = np.delete(U_x, ind, axis=0)
39+
U_y = np.delete(U_y, ind, axis=0)
40+
41+
42+
def max_entropy(active_learner, X, K=16, N=16):
43+
44+
class_prob = active_learner.predict_proba(X)
45+
entropy = entr(class_prob).sum(axis=1)
46+
uncertain_idx = np.argpartition(entropy, -K)[-K:]
47+
confidence_idx = np.argpartition(entropy, N)[:N]
48+
49+
return np.concatenate((uncertain_idx, confidence_idx), axis=0)
50+
51+
52+
active_learner = ActiveLearner(
53+
estimator=model,
54+
X_training=X_initial,
55+
y_training=y_initial,
56+
query_strategy=max_entropy
57+
)
58+
59+
N_QUERIES = 20
60+
61+
K_MAX_ENTROPY = 16
62+
N_MIN_ENTROPY = 16
63+
64+
scores = [active_learner.score(X_test, y_test)]
65+
66+
for index in range(N_QUERIES):
67+
68+
query_idx, query_instance = active_learner.query(U_x, K_MAX_ENTROPY, N_MIN_ENTROPY)
69+
70+
active_learner.teach(U_x[query_idx], U_y[query_idx])
71+
72+
U_x = np.delete(U_x, query_idx, axis=0)
73+
U_y = np.delete(U_y, query_idx, axis=0)
74+
75+
acc = active_learner.score(X_test, y_test)
76+
77+
print(F'Query {index+1}: Test Accuracy: {acc}')
78+
79+
scores.append(acc)

0 commit comments

Comments
 (0)