1
+ """
2
+ This is a modified implementation of the algorithm Cost Effective Active Learning
3
+ (Pl. refer - https://arxiv.org/abs/1701.03551). This version not only picks up the
4
+ top K uncertain samples but also picks up the top N highly confident samples that
5
+ may represent information and diversity. It is better than the original implementation
6
+ as it does not involve tuning the confidence threshold parameter for every dataset.
7
+ """
8
+
9
+ from keras .datasets import mnist
10
+ import numpy as np
11
+ from modAL .models import ActiveLearner
12
+ from sklearn .ensemble import RandomForestClassifier
13
+ from scipy .special import entr
14
+
15
+
16
+ (X_train , y_train ), (X_test , y_test ) = mnist .load_data ()
17
+
18
+ X_train = X_train / 255
19
+ X_test = X_test / 255
20
+ y_train = y_train .astype (np .uint8 )
21
+ y_test = y_test .astype (np .uint8 )
22
+
23
+ X_train = X_train .reshape (- 1 , 784 )
24
+ X_test = X_test .reshape (- 1 , 784 )
25
+
26
+ model = RandomForestClassifier (n_estimators = 100 )
27
+
28
+ INITIAL_SET_SIZE = 32
29
+
30
+ U_x = np .copy (X_train )
31
+ U_y = np .copy (y_train )
32
+
33
+ ind = np .random .choice (range (len (U_x )), size = INITIAL_SET_SIZE )
34
+
35
+ X_initial = U_x [ind ]
36
+ y_initial = U_y [ind ]
37
+
38
+ U_x = np .delete (U_x , ind , axis = 0 )
39
+ U_y = np .delete (U_y , ind , axis = 0 )
40
+
41
+
42
+ def max_entropy (active_learner , X , K = 16 , N = 16 ):
43
+
44
+ class_prob = active_learner .predict_proba (X )
45
+ entropy = entr (class_prob ).sum (axis = 1 )
46
+ uncertain_idx = np .argpartition (entropy , - K )[- K :]
47
+ confidence_idx = np .argpartition (entropy , N )[:N ]
48
+
49
+ return np .concatenate ((uncertain_idx , confidence_idx ), axis = 0 )
50
+
51
+
52
+ active_learner = ActiveLearner (
53
+ estimator = model ,
54
+ X_training = X_initial ,
55
+ y_training = y_initial ,
56
+ query_strategy = max_entropy
57
+ )
58
+
59
+ N_QUERIES = 20
60
+
61
+ K_MAX_ENTROPY = 16
62
+ N_MIN_ENTROPY = 16
63
+
64
+ scores = [active_learner .score (X_test , y_test )]
65
+
66
+ for index in range (N_QUERIES ):
67
+
68
+ query_idx , query_instance = active_learner .query (U_x , K_MAX_ENTROPY , N_MIN_ENTROPY )
69
+
70
+ active_learner .teach (U_x [query_idx ], U_y [query_idx ])
71
+
72
+ U_x = np .delete (U_x , query_idx , axis = 0 )
73
+ U_y = np .delete (U_y , query_idx , axis = 0 )
74
+
75
+ acc = active_learner .score (X_test , y_test )
76
+
77
+ print (F'Query { index + 1 } : Test Accuracy: { acc } ' )
78
+
79
+ scores .append (acc )
0 commit comments