Skip to content

Commit 4015bb2

Browse files
committed
Reduce logging in tokenizer.py
1 parent 5d1165f commit 4015bb2

File tree

2 files changed

+3
-19
lines changed

2 files changed

+3
-19
lines changed

clone-detector/execute.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ realpath() {
55
}
66
scriptPATH=$(realpath "$0")
77
rootPATH=$(dirname $scriptPATH)
8-
printf "\e[32m[execute.sh] \e[0m\n$rootPATH\n"
8+
printf "\e[32m[execute.sh] \e[0m$rootPATH\n"
99
rm -rf $rootPATH/NODE*
1010
num_nodes="${1:-2}"
1111
th="${2:-8}"

tokenizers/block-level/tokenizer.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -313,15 +313,11 @@ def process_file_contents(file_string, proj_id, file_id, container_path, file_pa
313313
(file_hash,lines,LOC,SLOC) = final_stats
314314
file_url = proj_url + '/' + file_path.replace(' ','%20')
315315
file_path = os.path.join(container_path, file_path)
316-
317-
logging.warning('Finished step1 on process_file_contents');
318-
316+
319317
# file stats start with a letter 'f'
320318
FILE_stats_file.write('f' + ','.join([proj_id,str(file_id),'\"'+file_path+'\"','\"'+file_url+'\"','\"'+file_hash+'\"',file_bytes,str(lines),str(LOC),str(SLOC)]) + '\n')
321319
blocks_data = zip(range(10000,99999),blocks_data)
322320

323-
logging.warning('Finished step2 on process_file_contents');
324-
325321
ww_time = dt.datetime.now()
326322

327323
try:
@@ -365,8 +361,6 @@ def process_file_contents(file_string, proj_id, file_id, container_path, file_pa
365361
def process_regular_folder(process_num, zip_file, proj_id, proj_path, proj_url, base_file_id, FILE_tokens_file, FILE_bookkeeping_proj, FILE_stats_file, logging):
366362
zip_time = file_time = string_time = tokens_time = hash_time = write_time = regex_time = 0
367363

368-
logging.info('Attempting to process_regular_folder '+proj_path)
369-
370364
result = [f for dp, dn, filenames in os.walk(proj_path) for f in filenames if (os.path.splitext(f)[1] in file_extensions)]
371365

372366
for file_path in result:
@@ -413,8 +407,6 @@ def process_regular_folder(process_num, zip_file, proj_id, proj_path, proj_url,
413407
write_time += times[4]
414408
hash_time += times[2]
415409
regex_time += times[3]
416-
417-
logging.info('Successfully ran process_regular_folder '+zip_file)
418410
return (zip_time, file_time, string_time, tokens_time, write_time, hash_time, regex_time)
419411

420412
def process_tgz_ball(process_num, tar_file, proj_id, proj_path, proj_url, base_file_id, FILE_tokens_file, FILE_bookkeeping_proj, FILE_stats_file, logging):
@@ -527,9 +519,6 @@ def process_one_project(process_num, proj_id, proj_path, base_file_id, FILE_toke
527519

528520
if project_format == 'leidos':
529521
proj_path, proj_url = proj_path
530-
531-
logging.info('Starting leidos project <'+proj_id+','+proj_path+'> (process '+str(process_num)+')')
532-
533522
if not os.path.isdir(proj_path):
534523
logging.warning('Unable to open project <'+proj_id+','+proj_path+'> (process '+str(process_num)+')')
535524
return
@@ -560,8 +549,6 @@ def process_one_project(process_num, proj_id, proj_path, base_file_id, FILE_toke
560549

561550
proj_id = str(proj_id_flag) + proj_id
562551

563-
logging.info('Starting zip project <'+proj_id+','+proj_path+'> (process '+str(process_num)+')')
564-
565552
if not os.path.isfile(proj_path):
566553
logging.warning('Unable to open project <'+proj_id+','+proj_path+'> (process '+str(process_num)+')')
567554
return
@@ -581,8 +568,6 @@ def process_one_project(process_num, proj_id, proj_path, base_file_id, FILE_toke
581568

582569
proj_id = str(proj_id_flag) + proj_id
583570

584-
logging.info('Starting folder project <'+proj_id+','+proj_path+'> (process '+str(process_num)+')')
585-
586571
if not os.path.exists(proj_path):
587572
logging.warning('Unable to open project <'+proj_id+','+proj_path+'> (process '+str(process_num)+')')
588573
return
@@ -635,8 +620,7 @@ def process_projects(process_num, list_projects, base_file_id, global_queue, pro
635620
FILE_tokens_file, FILE_bookkeeping_proj, FILE_stats_file, logging, project_format)
636621

637622
p_elapsed = (dt.datetime.now() - p_start).seconds
638-
logging.info('Process %s finished. %s files in %ss.',
639-
process_num, file_count, p_elapsed)
623+
logging.info('Process %s finished. %s files in %ss.', process_num, file_count, p_elapsed)
640624

641625
# Let parent know
642626
global_queue.put((process_num, file_count))

0 commit comments

Comments
 (0)