Skip to content

Commit 6223a22

Browse files
authored
Merge pull request #7 from rprtr258/refactor
Refactor
2 parents ad8cfba + 4015bb2 commit 6223a22

30 files changed

+335
-654
lines changed

.gitignore

Lines changed: 0 additions & 53 deletions
This file was deleted.

clone-detector/backup-gtpm.sh

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,14 @@
11
#!/bin/bash
2-
#
3-
#
42

53
realpath() {
64
[[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}"
75
}
86
scriptPATH=$(realpath "$0")
9-
rootPATH=`dirname $scriptPATH`
10-
echo "backing up gtpm indexes..."
7+
rootPATH=$(dirname $scriptPATH)
8+
printf "\e[32m[backup-gtpm.sh] \e[0mbacking up gtpm indexes...\n"
119
rm -rf $rootPATH/backup_gtpm
1210
mkdir $rootPATH/backup_gtpm
1311
cp -r $rootPATH/gtpmindex $rootPATH/backup_gtpm
1412

15-
echo "gtpmindex backup created "
16-
13+
printf "\e[32m[backup-gtpm.sh] \e[0mgtpmindex backup created\n"
1714

clone-detector/cleanup.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
#!/bin/bash
2+
3+
printf "\e[32m[cleanup.sh]\e[0m\n"
24
rm Log_*
35
rm -rf *index
46
rm -rf input/dataset/oldData

clone-detector/controller.py

Lines changed: 77 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -3,165 +3,111 @@
33
import sys
44
import os
55

6+
# exit codes
7+
EXIT_SUCCESS = 0
8+
EXIT_FAILURE = 1
9+
# states
10+
STATE_EXECUTE_1 = 0
11+
STATE_INIT = 1
12+
STATE_INDEX = 2
13+
STATE_MOVE_INDEX = 3
14+
STATE_EXECUTE_2 = 4
15+
STATE_SEARCH = 5
16+
617
class ScriptControllerException(Exception):
718
pass
819

920
# Aim of this class is to run the scripts for SourcererCC with a single command
1021
class ScriptController(object):
11-
# exit codes
12-
EXIT_SUCCESS = 0
13-
EXIT_FAILURE = 1
14-
# states
15-
STATE_EXECUTE_1 = 0
16-
STATE_INIT = 1
17-
STATE_INDEX = 2
18-
STATE_MOVE_INDEX = 3
19-
STATE_EXECUTE_2 = 4
20-
STATE_SEARCH = 5
21-
22-
def __init__(self, params):
23-
self.params = {}
24-
self.params.update(params)
22+
def __init__(self, num_nodes):
23+
self.num_nodes_search = num_nodes
2524
self.script_meta_file_name = self.full_file_path("scriptinator_metadata.scc")
26-
self.current_state = ScriptController.STATE_EXECUTE_1 # default state
25+
self.current_state = STATE_EXECUTE_1 # default state
2726
self.previous_run_state = self.load_previous_state()
2827

2928
def full_file_path(self,string):
30-
return os.path.join(os.path.dirname(os.path.realpath(__file__)),string)
29+
return os.path.join(os.path.dirname(os.path.realpath(__file__)), string)
3130

32-
def full_script_path(self,string,param=""):
33-
res = os.path.join(os.path.dirname(os.path.realpath(__file__)),string)
34-
if len(param) == 0:
35-
return res
36-
else:
37-
return res + " " + param
31+
def full_script_path(self,string,param = ""):
32+
res = self.full_file_path(string)
33+
if param != "":
34+
res += " " + param
35+
return res
3836

3937
def execute(self):
40-
# execute command
41-
print("previous run state {s}".format(s=self.previous_run_state))
42-
if self.previous_run_state > ScriptController.STATE_EXECUTE_1:
43-
returncode = ScriptController.EXIT_SUCCESS
38+
print("previous run state {}".format(self.previous_run_state))
39+
if self.previous_run_state > STATE_EXECUTE_1:
40+
returncode = EXIT_SUCCESS
4441
else:
45-
command = self.full_script_path('execute.sh', "1")
46-
command_params = command.split()
47-
returncode = self.run_command(
48-
command_params, self.full_file_path("Log_execute_1.out"), self.full_file_path("Log_execute_1.err"))
42+
returncode = self.run_command_wrapper("execute.sh", "1")
4943
self.current_state += 1
50-
if returncode == ScriptController.EXIT_SUCCESS:
51-
self.flush_state()
52-
# execute the init command
53-
if self.previous_run_state > ScriptController.STATE_INIT:
54-
returncode = ScriptController.EXIT_SUCCESS
55-
else:
56-
if self.previous_run_state == ScriptController.STATE_INIT:
57-
# last time the execution failed at init step. We need to replace the existing gtpm index from the backup
58-
command = self.full_script_path("restore-gtpm.sh")
59-
command_params = command.split()
60-
returncode = self.run_command(
61-
command_params, self.full_file_path("Log_restore_gtpm.out"), self.full_file_path("Log_restore_gtpm.err"))
62-
else:
63-
# take backup of existing gtpmindex before starting init
64-
command = self.full_script_path("backup-gtpm.sh")
65-
command_params = command.split()
66-
returncode = self.run_command(
67-
command_params, self.full_file_path("Log_backup_gtpm.out"), self.full_file_path("Log_backup_gtpm.err"))
68-
# run the init step
69-
command = self.full_script_path("runnodes.sh", "init 1")
70-
command_params = command.split()
71-
returncode = self.run_command(
72-
command_params, self.full_file_path("Log_init.out"), self.full_file_path("Log_init.err"))
73-
self.current_state += 1
74-
if returncode == ScriptController.EXIT_SUCCESS:
75-
self.flush_state()
76-
# execute index
77-
if self.previous_run_state > ScriptController.STATE_INDEX:
78-
returncode = ScriptController.EXIT_SUCCESS
79-
else:
80-
command = self.full_script_path("runnodes.sh", "index 1")
81-
command_params = command.split()
82-
returncode = self.run_command(
83-
command_params, self.full_file_path("Log_index.out"), self.full_file_path("Log_index.err"))
84-
self.current_state += 1
85-
if returncode == ScriptController.EXIT_SUCCESS:
86-
self.flush_state()
87-
if self.previous_run_state > ScriptController.STATE_MOVE_INDEX:
88-
returncode = ScriptController.EXIT_SUCCESS
89-
else:
90-
# execute move indexes
91-
command = self.full_script_path("move-index.sh")
92-
command_params = command.split()
93-
returncode = self.run_command(
94-
command_params, self.full_file_path("Log_move_index.out"), self.full_file_path("Log_move_index.err"))
95-
self.current_state += 1
96-
if returncode == ScriptController.EXIT_SUCCESS:
97-
self.flush_state()
98-
if self.previous_run_state > ScriptController.STATE_EXECUTE_2:
99-
returncode = ScriptController.EXIT_SUCCESS
100-
# execute command to create the dir structure
101-
else:
102-
command = self.full_script_path("execute.sh", "{nodes}".format(
103-
nodes=self.params["num_nodes_search"]))
104-
command_params = command.split()
105-
returncode = self.run_command(command_params, self.full_file_path("Log_execute_{nodes}.out".format(nodes=self.params["num_nodes_search"])), self.full_file_path("Log_execute_{nodes}.err".format(nodes=self.params["num_nodes_search"])))
106-
self.current_state += 1
107-
if returncode == ScriptController.EXIT_SUCCESS:
108-
self.flush_state()
109-
if self.previous_run_state > ScriptController.STATE_SEARCH:
110-
returncode = ScriptController.EXIT_SUCCESS
111-
else:
112-
command = self.full_script_path("runnodes.sh", "search {nodes}".format(
113-
nodes=self.params["num_nodes_search"]))
114-
command_params = command.split()
115-
returncode = self.run_command(
116-
command_params, self.full_file_path("Log_search.out"), self.full_file_path("Log_search.err"))
117-
self.current_state = ScriptController.STATE_EXECUTE_1 # go back to EXE 1 state
118-
if returncode == ScriptController.EXIT_SUCCESS:
119-
self.flush_state()
120-
print("SUCCESS: Search Completed on all nodes")
121-
else:
122-
raise ScriptControllerException("One or more nodes failed during Step Search. \
123-
Check Log_search.log for more details. grep for FAILED in the log file")
124-
else:
125-
raise ScriptControllerException(
126-
"error in execute.sh script while preparing for the search step.")
127-
else:
128-
raise ScriptControllerException(
129-
"error in move-index.sh script.")
130-
else:
131-
raise ScriptControllerException("error during indexing.")
132-
else:
133-
raise ScriptControllerException("error during init.")
44+
self.flush_state()
45+
# execute the init command
46+
if self.previous_run_state > STATE_INIT:
47+
returncode = EXIT_SUCCESS
13448
else:
135-
raise ScriptControllerException(
136-
"error in execute.sh script while preparing for init step.")
49+
if self.previous_run_state == STATE_INIT:
50+
# last time the execution failed at init step. We need to replace the existing gtpm index from the backup
51+
returncode = self.run_command_wrapper("restore-gtpm.sh", "")
52+
else:
53+
# take backup of existing gtpmindex before starting init
54+
returncode = self.run_command_wrapper("backup-gtpm.sh", "")
55+
# run the init step
56+
returncode = self.run_command_wrapper("runnodes.sh", "init 1")
57+
self.current_state += 1
58+
59+
# execute index
60+
returncode = self.perform_step(STATE_INDEX, "runnodes.sh", "index 1")
61+
# execute move indexes
62+
returncode = self.perform_step(STATE_MOVE_INDEX, "move-index.sh", "")
63+
# execute command to create the dir structure
64+
returncode = self.perform_step(STATE_EXECUTE_2, "execute.sh", "{}".format(self.num_nodes_search))
65+
returncode = self.perform_step(STATE_SEARCH, "runnodes.sh", "search {}".format(self.num_nodes_search))
66+
67+
self.flush_state()
68+
self.current_state = STATE_EXECUTE_1 # go back to EXE 1 state
69+
print("SUCCESS: Search Completed on all nodes")
70+
71+
def perform_step(self, state, cmd, params):
72+
return_code = EXIT_SUCCESS
73+
self.flush_state()
74+
if self.previous_run_state <= state:
75+
return_code = self.run_command_wrapper(cmd, params)
76+
self.current_state += 1
77+
return return_code
13778

13879
def flush_state(self):
139-
print("current state: ", str(self.current_state))
80+
print ("flushing current state {}".format(self.current_state))
14081
with open(self.script_meta_file_name, "w") as f:
141-
print ("flushing current state", str(self.current_state))
142-
f.write("{line}\n".format(line=self.current_state))
82+
f.write("{}\n".format(self.current_state))
14383

14484
def load_previous_state(self):
14585
print("loading previous run state")
14686
if os.path.isfile(self.script_meta_file_name):
14787
with open(self.script_meta_file_name, "r") as f:
14888
return int(f.readline())
14989
else:
150-
print("{f} doesn't exist, creating one with state EXECUTE_1".format(f=self.script_meta_file_name))
151-
return ScriptController.STATE_EXECUTE_1
90+
print("{} doesn't exist, creating one with state EXECUTE_1".format(self.script_meta_file_name))
91+
return STATE_EXECUTE_1
92+
93+
def run_command_wrapper(self, cmd, params):
94+
command = self.full_script_path(cmd, params)
95+
return_code = self.run_command(command.split())
96+
if return_code != EXIT_SUCCESS:
97+
raise ScriptControllerException("error during executing {}".format(" ".join(cmd)))
98+
return return_code
15299

153-
def run_command(self, cmd, outFile, errFile):
154-
print("running new command {}".format(" ".join(cmd)))
155-
with open(outFile, "w") as fo, \
156-
open(errFile, "w") as fe:
157-
p = subprocess.Popen(cmd, universal_newlines=True)
158-
p.communicate()
100+
def run_command(self, cmd):
101+
print("running command {}".format(" ".join(cmd)))
102+
p = subprocess.Popen(cmd, universal_newlines = True)
103+
p.communicate()
159104
return p.returncode
160105

161106
if __name__ == '__main__':
162-
numnodes = (2 if len(sys.argv) <= 1 else int(sys.argv[1]))
163-
print("search will be carried out with {num} nodes".format(num=numnodes))
164-
params = {"num_nodes_search": numnodes}
107+
numnodes = 2
108+
if len(sys.argv) >= 2:
109+
numnodes = int(sys.argv[1])
110+
print("search will be carried out with {} nodes".format(numnodes))
165111

166-
controller = ScriptController(params)
112+
controller = ScriptController(numnodes)
167113
controller.execute()

clone-detector/copy_properties.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#!/bin/bash
2+
23
num_nodes="${1:-0}"
34
for i in $(seq 1 1 $num_nodes)
45
do

clone-detector/execute.sh

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,16 @@ realpath() {
44
[[ $1 = /* ]] && echo "$1" || echo "$PWD/${1#./}"
55
}
66
scriptPATH=$(realpath "$0")
7-
rootPATH=`dirname $scriptPATH`
8-
echo $rootPATH
7+
rootPATH=$(dirname $scriptPATH)
8+
printf "\e[32m[execute.sh] \e[0m$rootPATH\n"
99
rm -rf $rootPATH/NODE*
1010
num_nodes="${1:-2}"
11-
#num_nodes=$((num_nodes-1))
1211
th="${2:-8}"
1312
queryfile="$rootPATH/input/dataset/blocks.file"
14-
echo "spliting query file $queryfile into $num_nodes parts"
13+
printf "\e[32m[execute.sh] \e[0mspliting query file $queryfile into $num_nodes parts\n"
1514
python $rootPATH/unevensplit.py $queryfile $num_nodes
16-
echo "moving files"
15+
printf "\e[32m[execute.sh] \e[0mmoving files\n"
1716
bash $rootPATH/preparequery.sh $num_nodes
18-
echo "done!"
17+
printf "\e[32m[execute.sh] \e[0mdone!\n"
1918
bash $rootPATH/replacenodeprefix.sh $num_nodes
2019

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
#!/bin/bash
2-
#
2+
33
# Run this script after indexing with several nodes
4-
#
54
ant cdmerge
6-
echo "merging..."
7-
unixPATH=`pwd`
8-
echo $unixPATH
9-
p=`cygpath -aw $unixPATH/sourcerer-cc.properties`
10-
echo $p
5+
printf "\e[32m[mergeindexes-cygwin.sh] \e[0mmerging...\n"
6+
unixPATH=$(pwd)
7+
printf "\e[32m[mergeindexes-cygwin.sh] \e[0m$unixPATH\n"
8+
p=$(cygpath -aw $unixPATH/sourcerer-cc.properties)
9+
printf "\e[32m[mergeindexes-cygwin.sh] \e[0m$p\n"
1110
java -Dproperties.location="$p" -Xms6g -Xmx6g -XX:+UseCompressedOops -jar dist/indexbased.IndexMerger.jar
1211

13-

clone-detector/mergeindexes.sh

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,9 @@
11
#!/bin/bash
2-
#
3-
# Run this script after indexing with several nodes
4-
#
52

3+
# Run this script after indexing with several nodes
64
ant cdmerge
7-
echo "merging..."
8-
rootPATH=`pwd`
9-
echo $rootPATH
5+
printf "\e[32m[mergeindexes.sh] \e[0mmerging...\n"
6+
rootPATH=$(pwd)
7+
printf "\e[32m[mergeindexes.sh] \e[0m$rootPATH\n"
108
java -Dproperties.location="$rootPATH/sourcerer-cc.properties" -Xms6g -Xmx6g -XX:+UseCompressedOops -jar dist/indexbased.IndexMerger.jar
119

12-

0 commit comments

Comments
 (0)