Add multiple editor classify code

me-diru · me-diru · commit 312594ab9bd2 · 2020-08-30T13:26:02.000+05:30
- Updated routes.py, classify.py, classify_request.py to accommodate
    multiple editor accounts at once.
- Corrected model learning rate.
- Updated classify_request result image.
diff --git a/spambrainz/app/classify.py b/spambrainz/app/classify.py
@@ -10,7 +10,6 @@
 
 # initialize constants used for redis server
 EDITOR_QUEUE = "editor_queue"
-BATCH_SIZE = 1
 SERVER_SLEEP = 0.25
 CLIENT_SLEEP = 0.25
 
@@ -21,45 +20,47 @@ def string_to_datetime(string_dt):
     return datetime.datetime(*[int(v) for v in string_dt.replace('T', '-').replace(':', '-').split('-')])
 
 # function used to retrive editor_data from redis and store the results back 
-def classify_process():
+def classify_process(size):
     
     print("* Loading model...")
     global model
     model = load_model('static/models/weights/current_lodbrok.h5')
     print("* Model loaded")
 
+    BATCH_SIZE = size
+
     # All the editor detials are retrived here from redis
     queue = db.lrange(EDITOR_QUEUE, 0, BATCH_SIZE - 1)
-    editorIDs = []
-    
-    queue = json.loads(queue[0])
-    editorIDs.append(queue["id"])
     
-    # changing string datetime to datetime objects
-    queue["birth_date"] = string_to_datetime(queue["birth_date"])
-    queue["member_since"] = string_to_datetime(queue["member_since"])
-    queue["email_confirm_date"] = string_to_datetime(queue["email_confirm_date"])
-    queue["last_updated"] = string_to_datetime(queue["last_updated"])
-    queue["last_login_date"] = string_to_datetime(queue["last_login_date"])
     
-    # preprocessing the given input to get prediction
-    queue = preprocess_editor(queue)
+    for q in queue:
 
-    # defining the structure
-    queue = np.array([queue])
+        q = json.loads(q)
+        editor_id = q["id"]
     
-    # only data from index 1 is considered while predicting, thus 
-    # not taking the spam value into consideration
-    predict_data = {
-        "main_input": np.array(queue[:,1:10]),
-        "email_input": np.array(queue[:,10]),
-        "website_input": np.array(queue[:,11]),
-        "bio_input": np.array(queue[:,12:]),
-    }
-
-    # check to see if we need to process the batch
-    if len(editorIDs) > 0:
+        # changing string datetime to datetime objects
+        q["birth_date"] = string_to_datetime(q["birth_date"])
+        q["member_since"] = string_to_datetime(q["member_since"])
+        q["email_confirm_date"] = string_to_datetime(q["email_confirm_date"])
+        q["last_updated"] = string_to_datetime(q["last_updated"])
+        q["last_login_date"] = string_to_datetime(q["last_login_date"])
+        
+        # preprocessing the given input to get prediction
+        q = preprocess_editor(q)
+
+        # defining the structure
+        q = np.array([q])
         
+        # only data from index 1 is considered while predicting, thus 
+        # not taking the spam value into consideration
+        predict_data = {
+            "main_input": np.array(q[:,1:10]),
+            "email_input": np.array(q[:,10]),
+            "website_input": np.array(q[:,11]),
+            "bio_input": np.array(q[:,12:]),
+        }
+
+    
         result = model.predict(x = [
             predict_data["main_input"], 
             predict_data["email_input"],
@@ -83,10 +84,10 @@ def classify_process():
         prediction = json.dumps(prediction)        
 
         #storign the result in redis
-        db.set(str(editorIDs[0]), prediction)
-
-        # remove the set of editor from our queue
-        db.ltrim(EDITOR_QUEUE, len(editorIDs), -1)
+        db.set(str(editor_id), prediction)
+    
+    # remove the set of editor from our queue
+    db.ltrim(EDITOR_QUEUE, size, -1)
 
   
 
diff --git a/spambrainz/app/routes.py b/spambrainz/app/routes.py
@@ -21,32 +21,49 @@ def predict():
     if flask.request.method == "POST":
         if flask.request.json:           
             
-            editor_account = flask.request.json
+            editor_accounts = flask.request.json
+
+            # taking editor IDs to get the results from redis later
+            editor_ids = []
+
+            for key, editor_account in editor_accounts.items():
+                editor_ids.append(editor_account["id"])
+
+                # convert missing parts to None to be compatible with preprossing
+                if(editor_account["area"]  is ''):
+                    editor_account["area"] = None
+                if(editor_account["bio"]  is ''):
+                    editor_account["bio"] = None
+            
+            number_of_editors = len(editor_ids)
+        
 
-            # taking editor ID to get the results from redis
-            editor_id = editor_account["id"]
+                
+            for i in range(0,number_of_editors):
 
-            # convert missing parts to None to be compatible with preprossing
-            if(editor_account["area"]  is ''):
-                editor_account["area"] = None
-            if(editor_account["bio"]  is ''):
-                editor_account["bio"] = None
+                # the editor accounts are pushed into the redis queue
+                db.rpush(EDITOR_QUEUE, json.dumps(editor_accounts[str(i)]))
             
-            editor_account = dict(editor_account)
-           
-            # the editor accounts are pushed into the redis queue
-            db.rpush(EDITOR_QUEUE, json.dumps(editor_account))
 
             # the classification model is called 
-            classify_process()      
+            classify_process(number_of_editors)      
 
             # the classification done is retrived form redis
-            output = db.get(editor_id)
-            output =  json.loads(output)
-            output["id"] = editor_id
-            if output is not None:
-                data["predictions"] = output              
-                db.delete(editor_id)                    
+            if number_of_editors > 0:
+
+                data["predictions"] = {}
+
+                for editor_id in editor_ids:
+                    output = db.get(editor_id)
+                    output =  json.loads(output)                   
+                    output["id"] = editor_id
+                    if output["result"] is not None:
+                        #add results under prediction
+                        data["predictions"][output["id"]] = output["result"]
+
+                    # remove result form redis                                 
+                    db.delete(editor_id)                    
+                
                 data["success"] = True
 
 
@@ -87,7 +104,7 @@ def train():
 
             # preprocessing the given data for the model to train on
             for i in range(0,number_of_editors):
-                print(int(editor_accounts[str(i)]['verdict']))
+                # print(int(editor_accounts[str(i)]['verdict']))
                 preprocess_data[i] = preprocess_editor(editor_accounts[str(i)], int(editor_accounts[str(i)]['verdict']))
 
             # retraining the model with new data
diff --git a/spambrainz/app/train.py b/spambrainz/app/train.py
@@ -69,7 +69,7 @@ def retrain_model(training_data):
 
     # set model optimizer learning rate to a smaller static value to avoid 
     # cateshtrophic forgetting
-    m.optimizer.lr = 0.01
+    m.optimizer.lr = 0.001
 
     # saving the previous weights before training for future reference
     m.save('static/models/weights/previous_lodbrok.h5')
diff --git a/spambrainz/classify_request.py b/spambrainz/classify_request.py
@@ -10,7 +10,9 @@
 KERAS_REST_API_URL = "http://localhost:4321/predict"
 
 # set spam editor account details to classify by the model
-editor_account = {    
+editor_account = {}
+
+editor_account[0]= {    
     'id' : 1,
     'email': 'ghhfbca@porsh.net',
     'website': 'http://www.kisaiya.co.uk',
@@ -26,6 +28,23 @@
           
 }
 
+# add non spam editor account details
+
+editor_account[1] = {
+    'id': 2,
+    'email': 'dckckaj@msn.com',
+    'website': 'http://valeur-dachat.fr',
+    'bio': 'enhance card gift leisure boost transfer detail layer mechanic gauge tomato repair rather infant laptop document wool december retreat behave sunset innocent what spray cake mother cart fall smart essay lyrics you observe battle film raccoon garment boss cook prize dumb police define outer shuffle glad engage stool chair recall depart use material yellow next life shoe print luxury isolate elegant civil bullet argue genuine swear allow unfold fortune region glory hour rule ',
+    'area': None,
+    'privs': 0,
+    'gender': None,
+    'birth_date': None,
+    'member_since': datetime.datetime(2004, 10, 13, 11, 3, 46, 5).strftime('%Y-%m-%dT%H:%M:%S'),
+    'email_confirm_date': datetime.datetime(2004, 10, 14, 9, 38, 45, 5).strftime('%Y-%m-%dT%H:%M:%S'),
+    'last_updated': None,
+    'last_login_date': None
+}
+
 # submit the request to classify the given data by lodbrok model to /predict endpoint 
 r = requests.post(KERAS_REST_API_URL,json = editor_account).json()
 
diff --git a/spambrainz/static/images/classify_request.png b/spambrainz/static/images/classify_request.png