JetBrains-Research
diff --git a/‎.gitignore‎
Lines changed: 0 additions & 53 deletions b/‎.gitignore‎
Lines changed: 0 additions & 53 deletions
diff --git a/‎clone-detector/backup-gtpm.sh‎
Lines changed: 3 additions & 6 deletions b/‎clone-detector/backup-gtpm.sh‎
Lines changed: 3 additions & 6 deletions
diff --git a/‎clone-detector/cleanup.sh‎
Lines changed: 2 additions & 0 deletions b/‎clone-detector/cleanup.sh‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎clone-detector/controller.py‎
Lines changed: 77 additions & 131 deletions b/‎clone-detector/controller.py‎
Lines changed: 77 additions & 131 deletions
diff --git a/‎clone-detector/copy_properties.sh‎
Lines changed: 1 addition & 0 deletions b/‎clone-detector/copy_properties.sh‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎clone-detector/execute.sh‎
Lines changed: 5 additions & 6 deletions b/‎clone-detector/execute.sh‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎clone-detector/mergeindexes-cygwin.sh‎
Lines changed: 6 additions & 8 deletions b/‎clone-detector/mergeindexes-cygwin.sh‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎clone-detector/mergeindexes.sh‎
Lines changed: 4 additions & 7 deletions b/‎clone-detector/mergeindexes.sh‎
Lines changed: 4 additions & 7 deletions
@@ -1,17 +1,14 @@
 #!/bin/bash
-#
-#
 
 realpath() {
     [[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}"
 }
 scriptPATH=$(realpath "$0")
-rootPATH=`dirname $scriptPATH`
-echo "backing up gtpm indexes..."
+rootPATH=$(dirname $scriptPATH)
+printf "\e[32m[backup-gtpm.sh] \e[0mbacking up gtpm indexes...\n"
 rm -rf $rootPATH/backup_gtpm
 mkdir $rootPATH/backup_gtpm
 cp -r $rootPATH/gtpmindex $rootPATH/backup_gtpm
 
-echo "gtpmindex backup created "
-
+printf "\e[32m[backup-gtpm.sh] \e[0mgtpmindex backup created\n"
 
@@ -1,4 +1,6 @@
 #!/bin/bash
+
+printf "\e[32m[cleanup.sh]\e[0m\n"
 rm Log_*
 rm -rf *index
 rm -rf input/dataset/oldData
 
@@ -3,165 +3,111 @@
 import sys
 import os
 
+# exit codes
+EXIT_SUCCESS = 0
+EXIT_FAILURE = 1
+# states
+STATE_EXECUTE_1 = 0
+STATE_INIT = 1
+STATE_INDEX = 2
+STATE_MOVE_INDEX = 3
+STATE_EXECUTE_2 = 4
+STATE_SEARCH = 5
+
 class ScriptControllerException(Exception):
     pass
 
 # Aim of this class is to run the scripts for SourcererCC with a single command
 class ScriptController(object):
-    # exit codes
-    EXIT_SUCCESS = 0
-    EXIT_FAILURE = 1
-    # states
-    STATE_EXECUTE_1 = 0
-    STATE_INIT = 1
-    STATE_INDEX = 2
-    STATE_MOVE_INDEX = 3
-    STATE_EXECUTE_2 = 4
-    STATE_SEARCH = 5
-
-    def __init__(self, params):
-        self.params = {}
-        self.params.update(params)
+    def __init__(self, num_nodes):
+        self.num_nodes_search = num_nodes
         self.script_meta_file_name = self.full_file_path("scriptinator_metadata.scc")
-        self.current_state = ScriptController.STATE_EXECUTE_1  # default state
+        self.current_state = STATE_EXECUTE_1  # default state
         self.previous_run_state = self.load_previous_state()
 
     def full_file_path(self,string):
-        return os.path.join(os.path.dirname(os.path.realpath(__file__)),string)
+        return os.path.join(os.path.dirname(os.path.realpath(__file__)), string)
 
-    def full_script_path(self,string,param=""):
-        res = os.path.join(os.path.dirname(os.path.realpath(__file__)),string)
-        if len(param) == 0:
-            return res
-        else:
-            return res + " " + param
+    def full_script_path(self,string,param = ""):
+        res = self.full_file_path(string)
+        if param != "":
+            res += " " + param
+        return res
 
     def execute(self):
-        # execute command
-        print("previous run state {s}".format(s=self.previous_run_state))
-        if self.previous_run_state > ScriptController.STATE_EXECUTE_1:
-            returncode = ScriptController.EXIT_SUCCESS
+        print("previous run state {}".format(self.previous_run_state))
+        if self.previous_run_state > STATE_EXECUTE_1:
+            returncode = EXIT_SUCCESS
         else:
-            command = self.full_script_path('execute.sh', "1")
-            command_params = command.split()
-            returncode = self.run_command(
-                command_params, self.full_file_path("Log_execute_1.out"), self.full_file_path("Log_execute_1.err"))
+            returncode = self.run_command_wrapper("execute.sh", "1")
         self.current_state += 1
-        if returncode == ScriptController.EXIT_SUCCESS:
-            self.flush_state()
-            # execute the init command
-            if self.previous_run_state > ScriptController.STATE_INIT:
-                returncode = ScriptController.EXIT_SUCCESS
-            else:
-                if self.previous_run_state == ScriptController.STATE_INIT:
-                    # last time the execution failed at init step. We need to replace the existing gtpm index  from the backup
-                    command = self.full_script_path("restore-gtpm.sh")
-                    command_params = command.split()
-                    returncode = self.run_command(
-                    command_params, self.full_file_path("Log_restore_gtpm.out"), self.full_file_path("Log_restore_gtpm.err"))
-                else:
-                    # take backup of existing gtpmindex before starting init
-                    command = self.full_script_path("backup-gtpm.sh")
-                    command_params = command.split()
-                    returncode = self.run_command(
-                    command_params, self.full_file_path("Log_backup_gtpm.out"), self.full_file_path("Log_backup_gtpm.err"))
-                # run the init step
-                command = self.full_script_path("runnodes.sh", "init 1")
-                command_params = command.split()
-                returncode = self.run_command(
-                    command_params, self.full_file_path("Log_init.out"), self.full_file_path("Log_init.err"))
-            self.current_state += 1
-            if returncode == ScriptController.EXIT_SUCCESS:
-                self.flush_state()
-                # execute index
-                if self.previous_run_state > ScriptController.STATE_INDEX:
-                    returncode = ScriptController.EXIT_SUCCESS
-                else:
-                    command = self.full_script_path("runnodes.sh", "index 1")
-                    command_params = command.split()
-                    returncode = self.run_command(
-                        command_params, self.full_file_path("Log_index.out"), self.full_file_path("Log_index.err"))
-                self.current_state += 1
-                if returncode == ScriptController.EXIT_SUCCESS:
-                    self.flush_state()
-                    if self.previous_run_state > ScriptController.STATE_MOVE_INDEX:
-                        returncode = ScriptController.EXIT_SUCCESS
-                    else:
-                        # execute move indexes
-                        command = self.full_script_path("move-index.sh")
-                        command_params = command.split()
-                        returncode = self.run_command(
-                            command_params, self.full_file_path("Log_move_index.out"), self.full_file_path("Log_move_index.err"))
-                    self.current_state += 1
-                    if returncode == ScriptController.EXIT_SUCCESS:
-                        self.flush_state()
-                        if self.previous_run_state > ScriptController.STATE_EXECUTE_2:
-                            returncode = ScriptController.EXIT_SUCCESS
-                            # execute command to create the dir structure
-                        else:
-                            command = self.full_script_path("execute.sh", "{nodes}".format(
-                                nodes=self.params["num_nodes_search"]))
-                            command_params = command.split()
-                            returncode = self.run_command(command_params, self.full_file_path("Log_execute_{nodes}.out".format(nodes=self.params["num_nodes_search"])), self.full_file_path("Log_execute_{nodes}.err".format(nodes=self.params["num_nodes_search"])))
-                        self.current_state += 1
-                        if returncode == ScriptController.EXIT_SUCCESS:
-                            self.flush_state()
-                            if self.previous_run_state > ScriptController.STATE_SEARCH:
-                                returncode = ScriptController.EXIT_SUCCESS
-                            else:
-                                command = self.full_script_path("runnodes.sh", "search {nodes}".format(
-                                    nodes=self.params["num_nodes_search"]))
-                                command_params = command.split()
-                                returncode = self.run_command(
-                                    command_params, self.full_file_path("Log_search.out"), self.full_file_path("Log_search.err"))
-                            self.current_state = ScriptController.STATE_EXECUTE_1 # go back to EXE 1 state
-                            if returncode == ScriptController.EXIT_SUCCESS:
-                                self.flush_state()
-                                print("SUCCESS: Search Completed on all nodes")
-                            else:
-                                raise ScriptControllerException("One or more nodes failed during Step Search. \
-                                    Check Log_search.log for more details. grep for FAILED in the log file")
-                        else:
-                            raise ScriptControllerException(
-                                "error in execute.sh script while preparing for the search step.")
-                    else:
-                        raise ScriptControllerException(
-                            "error in move-index.sh script.")
-                else:
-                    raise ScriptControllerException("error during indexing.")
-            else:
-                raise ScriptControllerException("error during init.")
+        self.flush_state()
+        # execute the init command
+        if self.previous_run_state > STATE_INIT:
+            returncode = EXIT_SUCCESS
         else:
-            raise ScriptControllerException(
-                "error in execute.sh script while preparing for init step.")
+            if self.previous_run_state == STATE_INIT:
+                # last time the execution failed at init step. We need to replace the existing gtpm index from the backup
+                returncode = self.run_command_wrapper("restore-gtpm.sh", "")
+            else:
+                # take backup of existing gtpmindex before starting init
+                returncode = self.run_command_wrapper("backup-gtpm.sh", "")
+            # run the init step
+            returncode = self.run_command_wrapper("runnodes.sh", "init 1")
+        self.current_state += 1
+
+        # execute index
+        returncode = self.perform_step(STATE_INDEX, "runnodes.sh", "index 1")
+        # execute move indexes
+        returncode = self.perform_step(STATE_MOVE_INDEX, "move-index.sh", "")
+        # execute command to create the dir structure
+        returncode = self.perform_step(STATE_EXECUTE_2, "execute.sh", "{}".format(self.num_nodes_search))
+        returncode = self.perform_step(STATE_SEARCH, "runnodes.sh", "search {}".format(self.num_nodes_search))
+        
+        self.flush_state()
+        self.current_state = STATE_EXECUTE_1 # go back to EXE 1 state
+        print("SUCCESS: Search Completed on all nodes")
+
+    def perform_step(self, state, cmd, params):
+        return_code = EXIT_SUCCESS
+        self.flush_state()
+        if self.previous_run_state <= state:
+            return_code = self.run_command_wrapper(cmd, params)
+        self.current_state += 1
+        return return_code
 
     def flush_state(self):
-        print("current state: ", str(self.current_state))
+        print ("flushing current state {}".format(self.current_state))
         with open(self.script_meta_file_name, "w") as f:
-            print ("flushing current state", str(self.current_state))
-            f.write("{line}\n".format(line=self.current_state))
+            f.write("{}\n".format(self.current_state))
 
     def load_previous_state(self):
         print("loading previous run state")
         if os.path.isfile(self.script_meta_file_name):
             with open(self.script_meta_file_name, "r") as f:
                 return int(f.readline())
         else:
-            print("{f} doesn't exist, creating one with state EXECUTE_1".format(f=self.script_meta_file_name))
-            return ScriptController.STATE_EXECUTE_1
+            print("{} doesn't exist, creating one with state EXECUTE_1".format(self.script_meta_file_name))
+            return STATE_EXECUTE_1
+    
+    def run_command_wrapper(self, cmd, params):
+        command = self.full_script_path(cmd, params)
+        return_code = self.run_command(command.split())
+        if return_code != EXIT_SUCCESS:
+            raise ScriptControllerException("error during executing {}".format(" ".join(cmd)))
+        return return_code
 
-    def run_command(self, cmd, outFile, errFile):
-        print("running new command {}".format(" ".join(cmd)))
-        with open(outFile, "w") as fo, \
-             open(errFile, "w") as fe:
-            p = subprocess.Popen(cmd, universal_newlines=True)
-            p.communicate()
+    def run_command(self, cmd):
+        print("running command {}".format(" ".join(cmd)))
+        p = subprocess.Popen(cmd, universal_newlines = True)
+        p.communicate()
         return p.returncode
 
 if __name__ == '__main__':
-    numnodes = (2 if len(sys.argv) <= 1 else int(sys.argv[1]))
-    print("search will be carried out with {num} nodes".format(num=numnodes))
-    params = {"num_nodes_search": numnodes}
+    numnodes = 2
+    if len(sys.argv) >= 2:
+        numnodes = int(sys.argv[1])
+    print("search will be carried out with {} nodes".format(numnodes))
 
-    controller = ScriptController(params)
+    controller = ScriptController(numnodes)
     controller.execute()
@@ -1,4 +1,5 @@
 #!/bin/bash
+
 num_nodes="${1:-0}"
 for i in $(seq 1 1 $num_nodes)
 do
 
@@ -4,17 +4,16 @@ realpath() {
     [[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}"
 }
 scriptPATH=$(realpath "$0")
-rootPATH=`dirname $scriptPATH`
-echo $rootPATH
+rootPATH=$(dirname $scriptPATH)
+printf "\e[32m[execute.sh] \e[0m$rootPATH\n"
 rm -rf $rootPATH/NODE*
 num_nodes="${1:-2}"
-#num_nodes=$((num_nodes-1))
 th="${2:-8}"
 queryfile="$rootPATH/input/dataset/blocks.file"
-echo "spliting query file $queryfile into $num_nodes parts"
+printf "\e[32m[execute.sh] \e[0mspliting query file $queryfile into $num_nodes parts\n"
 python $rootPATH/unevensplit.py $queryfile $num_nodes
-echo "moving files"
+printf "\e[32m[execute.sh] \e[0mmoving files\n"
 bash $rootPATH/preparequery.sh $num_nodes
-echo "done!"
+printf "\e[32m[execute.sh] \e[0mdone!\n"
 bash $rootPATH/replacenodeprefix.sh $num_nodes
 
@@ -1,13 +1,11 @@
 #!/bin/bash
-#
+
 # Run this script after indexing with several nodes
-#
 ant cdmerge
-echo "merging..."
-unixPATH=`pwd`
-echo $unixPATH
-p=`cygpath -aw $unixPATH/sourcerer-cc.properties`
-echo $p
+printf "\e[32m[mergeindexes-cygwin.sh] \e[0mmerging...\n"
+unixPATH=$(pwd)
+printf "\e[32m[mergeindexes-cygwin.sh] \e[0m$unixPATH\n"
+p=$(cygpath -aw $unixPATH/sourcerer-cc.properties)
+printf "\e[32m[mergeindexes-cygwin.sh] \e[0m$p\n"
 java -Dproperties.location="$p" -Xms6g -Xmx6g -XX:+UseCompressedOops -jar dist/indexbased.IndexMerger.jar
 
-
@@ -1,12 +1,9 @@
 #!/bin/bash
-#
-# Run this script after indexing with several nodes
-#
 
+# Run this script after indexing with several nodes
 ant cdmerge
-echo "merging..."
-rootPATH=`pwd`
-echo $rootPATH
+printf "\e[32m[mergeindexes.sh] \e[0mmerging...\n"
+rootPATH=$(pwd)
+printf "\e[32m[mergeindexes.sh] \e[0m$rootPATH\n"
 java -Dproperties.location="$rootPATH/sourcerer-cc.properties" -Xms6g -Xmx6g -XX:+UseCompressedOops -jar dist/indexbased.IndexMerger.jar
 
-
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`#!/bin/bash`
	`2`	`+`
`2`	`3`	`num_nodes="${1:-0}"`
`3`	`4`	`for i in $(seq 1 1 $num_nodes)`
`4`	`5`	`do`