Features added referring to #15

greninja · greninja · commit 709f52f6cdfd · 2017-01-16T03:57:22.000+05:30
diff --git a/conf/cuckooml.conf b/conf/cuckooml.conf
@@ -38,3 +38,6 @@ compare_new_samples = true
 
 # Set folder for samples to be compared against clustering
 test_directory = sample_data/test
+
+# Do plotting?
+plotting = true
diff --git a/modules/processing/cuckooml.py b/modules/processing/cuckooml.py
@@ -15,11 +15,19 @@
 from lib.cuckoo.common.constants import CUCKOO_ROOT
 from math import log
 
+if Config("cuckooml").cuckooml.plotting:
+    try:
+        import matplotlib.pyplot as plt
+        import seaborn as sns
+    except ImportError, e:
+        print >> sys.stderr, "Plotting libraries \
+            (matplotlib and seaborn) are not available."
+        print >> sys.stderr, e
+
+        
 try:
-    import matplotlib.pyplot as plt
     import numpy as np
     import pandas as pd
-    import seaborn as sns
     from hdbscan import HDBSCAN
     from sklearn import metrics
     from sklearn.cluster import DBSCAN
@@ -797,6 +805,14 @@ def filter_dataset(self, dataset=None, feature_coverage=0.1,
 
     def detect_abnormal_behaviour(self, count_dataset=None, figures=True):
         """Detect samples that behave significantly different than others."""
+        
+        # Safety check for plotting
+        if not Config("cuckooml").cuckooml.plotting and figures:
+            print >> sys.stderr, "Warning:'plotting' flag disabled in config file, \
+                                  'figures' flag will be overwritten."
+            figures = False
+
+
         if count_dataset is None:
             # Pull all count features
             count_features = self.feature_category(":count:")
@@ -1133,6 +1149,14 @@ def performance_metric(clustering, labels, data, noise):
 
     def clustering_label_distribution(self, clustering, labels, plot=False):
         """Get statistics about number of ground truth labels per cluster."""
+        
+        # Safety check for plotting
+        if not Config("cuckooml").cuckooml.plotting and plot:
+            print >> sys.stderr, "Warning:'plotting' flag disabled in config file, \
+                                  'plot' flag will be overwritten."
+            plot = False
+
+
         cluster_ids = set(clustering["label"].tolist())
         labels_ids = set(labels["label"].tolist())
         cluster_distribution = {}