Skip to content

Commit 312594a

Browse files
committed
Add multiple editor classify code
- Updated routes.py, classify.py, classify_request.py to accommodate multiple editor accounts at once. - Corrected model learning rate. - Updated classify_request result image.
1 parent 1950e96 commit 312594a

File tree

5 files changed

+90
-53
lines changed

5 files changed

+90
-53
lines changed

spambrainz/app/classify.py

Lines changed: 32 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
# initialize constants used for redis server
1212
EDITOR_QUEUE = "editor_queue"
13-
BATCH_SIZE = 1
1413
SERVER_SLEEP = 0.25
1514
CLIENT_SLEEP = 0.25
1615

@@ -21,45 +20,47 @@ def string_to_datetime(string_dt):
2120
return datetime.datetime(*[int(v) for v in string_dt.replace('T', '-').replace(':', '-').split('-')])
2221

2322
# function used to retrive editor_data from redis and store the results back
24-
def classify_process():
23+
def classify_process(size):
2524

2625
print("* Loading model...")
2726
global model
2827
model = load_model('static/models/weights/current_lodbrok.h5')
2928
print("* Model loaded")
3029

30+
BATCH_SIZE = size
31+
3132
# All the editor detials are retrived here from redis
3233
queue = db.lrange(EDITOR_QUEUE, 0, BATCH_SIZE - 1)
33-
editorIDs = []
34-
35-
queue = json.loads(queue[0])
36-
editorIDs.append(queue["id"])
3734

38-
# changing string datetime to datetime objects
39-
queue["birth_date"] = string_to_datetime(queue["birth_date"])
40-
queue["member_since"] = string_to_datetime(queue["member_since"])
41-
queue["email_confirm_date"] = string_to_datetime(queue["email_confirm_date"])
42-
queue["last_updated"] = string_to_datetime(queue["last_updated"])
43-
queue["last_login_date"] = string_to_datetime(queue["last_login_date"])
4435

45-
# preprocessing the given input to get prediction
46-
queue = preprocess_editor(queue)
36+
for q in queue:
4737

48-
# defining the structure
49-
queue = np.array([queue])
38+
q = json.loads(q)
39+
editor_id = q["id"]
5040

51-
# only data from index 1 is considered while predicting, thus
52-
# not taking the spam value into consideration
53-
predict_data = {
54-
"main_input": np.array(queue[:,1:10]),
55-
"email_input": np.array(queue[:,10]),
56-
"website_input": np.array(queue[:,11]),
57-
"bio_input": np.array(queue[:,12:]),
58-
}
59-
60-
# check to see if we need to process the batch
61-
if len(editorIDs) > 0:
41+
# changing string datetime to datetime objects
42+
q["birth_date"] = string_to_datetime(q["birth_date"])
43+
q["member_since"] = string_to_datetime(q["member_since"])
44+
q["email_confirm_date"] = string_to_datetime(q["email_confirm_date"])
45+
q["last_updated"] = string_to_datetime(q["last_updated"])
46+
q["last_login_date"] = string_to_datetime(q["last_login_date"])
47+
48+
# preprocessing the given input to get prediction
49+
q = preprocess_editor(q)
50+
51+
# defining the structure
52+
q = np.array([q])
6253

54+
# only data from index 1 is considered while predicting, thus
55+
# not taking the spam value into consideration
56+
predict_data = {
57+
"main_input": np.array(q[:,1:10]),
58+
"email_input": np.array(q[:,10]),
59+
"website_input": np.array(q[:,11]),
60+
"bio_input": np.array(q[:,12:]),
61+
}
62+
63+
6364
result = model.predict(x = [
6465
predict_data["main_input"],
6566
predict_data["email_input"],
@@ -83,10 +84,10 @@ def classify_process():
8384
prediction = json.dumps(prediction)
8485

8586
#storign the result in redis
86-
db.set(str(editorIDs[0]), prediction)
87-
88-
# remove the set of editor from our queue
89-
db.ltrim(EDITOR_QUEUE, len(editorIDs), -1)
87+
db.set(str(editor_id), prediction)
88+
89+
# remove the set of editor from our queue
90+
db.ltrim(EDITOR_QUEUE, size, -1)
9091

9192

9293

spambrainz/app/routes.py

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,32 +21,49 @@ def predict():
2121
if flask.request.method == "POST":
2222
if flask.request.json:
2323

24-
editor_account = flask.request.json
24+
editor_accounts = flask.request.json
25+
26+
# taking editor IDs to get the results from redis later
27+
editor_ids = []
28+
29+
for key, editor_account in editor_accounts.items():
30+
editor_ids.append(editor_account["id"])
31+
32+
# convert missing parts to None to be compatible with preprossing
33+
if(editor_account["area"] is ''):
34+
editor_account["area"] = None
35+
if(editor_account["bio"] is ''):
36+
editor_account["bio"] = None
37+
38+
number_of_editors = len(editor_ids)
39+
2540

26-
# taking editor ID to get the results from redis
27-
editor_id = editor_account["id"]
41+
42+
for i in range(0,number_of_editors):
2843

29-
# convert missing parts to None to be compatible with preprossing
30-
if(editor_account["area"] is ''):
31-
editor_account["area"] = None
32-
if(editor_account["bio"] is ''):
33-
editor_account["bio"] = None
44+
# the editor accounts are pushed into the redis queue
45+
db.rpush(EDITOR_QUEUE, json.dumps(editor_accounts[str(i)]))
3446

35-
editor_account = dict(editor_account)
36-
37-
# the editor accounts are pushed into the redis queue
38-
db.rpush(EDITOR_QUEUE, json.dumps(editor_account))
3947

4048
# the classification model is called
41-
classify_process()
49+
classify_process(number_of_editors)
4250

4351
# the classification done is retrived form redis
44-
output = db.get(editor_id)
45-
output = json.loads(output)
46-
output["id"] = editor_id
47-
if output is not None:
48-
data["predictions"] = output
49-
db.delete(editor_id)
52+
if number_of_editors > 0:
53+
54+
data["predictions"] = {}
55+
56+
for editor_id in editor_ids:
57+
output = db.get(editor_id)
58+
output = json.loads(output)
59+
output["id"] = editor_id
60+
if output["result"] is not None:
61+
#add results under prediction
62+
data["predictions"][output["id"]] = output["result"]
63+
64+
# remove result form redis
65+
db.delete(editor_id)
66+
5067
data["success"] = True
5168

5269

@@ -87,7 +104,7 @@ def train():
87104

88105
# preprocessing the given data for the model to train on
89106
for i in range(0,number_of_editors):
90-
print(int(editor_accounts[str(i)]['verdict']))
107+
# print(int(editor_accounts[str(i)]['verdict']))
91108
preprocess_data[i] = preprocess_editor(editor_accounts[str(i)], int(editor_accounts[str(i)]['verdict']))
92109

93110
# retraining the model with new data

spambrainz/app/train.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def retrain_model(training_data):
6969

7070
# set model optimizer learning rate to a smaller static value to avoid
7171
# cateshtrophic forgetting
72-
m.optimizer.lr = 0.01
72+
m.optimizer.lr = 0.001
7373

7474
# saving the previous weights before training for future reference
7575
m.save('static/models/weights/previous_lodbrok.h5')

spambrainz/classify_request.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
KERAS_REST_API_URL = "http://localhost:4321/predict"
1111

1212
# set spam editor account details to classify by the model
13-
editor_account = {
13+
editor_account = {}
14+
15+
editor_account[0]= {
1416
'id' : 1,
1517
'email': '[email protected]',
1618
'website': 'http://www.kisaiya.co.uk',
@@ -26,6 +28,23 @@
2628

2729
}
2830

31+
# add non spam editor account details
32+
33+
editor_account[1] = {
34+
'id': 2,
35+
'email': '[email protected]',
36+
'website': 'http://valeur-dachat.fr',
37+
'bio': 'enhance card gift leisure boost transfer detail layer mechanic gauge tomato repair rather infant laptop document wool december retreat behave sunset innocent what spray cake mother cart fall smart essay lyrics you observe battle film raccoon garment boss cook prize dumb police define outer shuffle glad engage stool chair recall depart use material yellow next life shoe print luxury isolate elegant civil bullet argue genuine swear allow unfold fortune region glory hour rule ',
38+
'area': None,
39+
'privs': 0,
40+
'gender': None,
41+
'birth_date': None,
42+
'member_since': datetime.datetime(2004, 10, 13, 11, 3, 46, 5).strftime('%Y-%m-%dT%H:%M:%S'),
43+
'email_confirm_date': datetime.datetime(2004, 10, 14, 9, 38, 45, 5).strftime('%Y-%m-%dT%H:%M:%S'),
44+
'last_updated': None,
45+
'last_login_date': None
46+
}
47+
2948
# submit the request to classify the given data by lodbrok model to /predict endpoint
3049
r = requests.post(KERAS_REST_API_URL,json = editor_account).json()
3150

2.61 KB
Loading

0 commit comments

Comments
 (0)