11#!/usr/bin/env python
22
3- import sys , os , subprocess , inspect , multiprocessing , shutil , argparse , time , re , platform
3+ import sys , os , subprocess , inspect , shutil , argparse , re
44from Bio import SeqIO
55currentdir = os .path .dirname (os .path .abspath (inspect .getfile (inspect .currentframe ())))
66parentdir = os .path .dirname (currentdir )
7- sys .path .insert (0 ,parentdir )
7+ sys .path .insert (0 , parentdir )
88import lib .library as lib
99
1010#setup menu with argparse
1111class MyFormatter (argparse .ArgumentDefaultsHelpFormatter ):
12- def __init__ (self ,prog ):
13- super (MyFormatter ,self ).__init__ (prog ,max_help_position = 48 )
14- parser = argparse .ArgumentParser (prog = 'funannotate-predict.py' , usage = "%(prog)s [options] -i genome.fasta" ,
15- description = '''Script that does it all.. .''' ,
16- epilog = """Written by Jon Palmer (2016) [email protected] """ ,
12+ def __init__ (self , prog ):
13+ super (MyFormatter , self ).__init__ (prog , max_help_position = 48 )
14+ parser = argparse .ArgumentParser (prog = 'funannotate-predict.py' , usage = "%(prog)s [options] -i genome.fasta" ,
15+ description = '''Script that does it all.''' ,
16+ epilog = """Written by Jon Palmer (2016) [email protected] """ ,
1717 formatter_class = MyFormatter )
18- parser .add_argument ('-i' ,'--input' , required = True , help = 'Genome in FASTA format' )
19- parser .add_argument ('-o' ,'--out' , required = True , help = 'Basename of output files' )
20- parser .add_argument ('-s' ,'--species' , required = True , help = 'Species name (e.g. "Aspergillus fumigatus") use quotes if there is a space' )
18+ parser .add_argument ('-i' , '--input' , required = True , help = 'Genome in FASTA format' )
19+ parser .add_argument ('-o' , '--out' , required = True , help = 'Basename of output files' )
20+ parser .add_argument ('-s' , '--species' , required = True , help = 'Species name (e.g. "Aspergillus fumigatus") use quotes if there is a space' )
2121parser .add_argument ('--isolate' , help = 'Isolate/strain name (e.g. Af293)' )
2222parser .add_argument ('--header_length' , default = 16 , type = int , help = 'Max length for fasta headers' )
2323parser .add_argument ('--name' , default = "FUN_" , help = 'Shortname for genes, perhaps assigned by NCBI, eg. VC83' )
@@ -52,7 +52,7 @@ def __init__(self,prog):
5252conflict = ['busco' , 'busco_proteins' , 'RepeatMasker' , 'RepeatModeler' , 'genemark' , 'EVM_tmp' , 'braker' ]
5353if args .out in conflict :
5454 lib .log .error ("%s output folder conflicts with a hard coded tmp folder, please change -o parameter" % args .out )
55- os . _exit (1 )
55+ sys . exit (1 )
5656
5757#create folder structure
5858if not os .path .exists (args .out ):
@@ -87,19 +87,18 @@ def __init__(self,prog):
8787blastdb = os .path .join (parentdir ,'DB' ,'REPEATS.psq' )
8888if not os .path .isfile (blastdb ):
8989 lib .log .error ("funannotate database is not properly configured, please run `./setup.sh` in the %s directory" % parentdir )
90- os . _exit (1 )
90+ sys . exit (1 )
9191#check buscos, download if necessary
9292if not os .path .isdir (os .path .join (parentdir , 'DB' , args .busco_db )):
9393 lib .download_buscos (args .busco_db )
9494
95-
9695#do some checks and balances
9796try :
9897 EVM = os .environ ["EVM_HOME" ]
9998except KeyError :
10099 if not args .EVM_HOME :
101100 lib .log .error ("$EVM_HOME environmental variable not found, Evidence Modeler is not properly configured. You can use the --EVM_HOME argument to specifiy a path at runtime" )
102- os . _exit (1 )
101+ sys . exit (1 )
103102 else :
104103 EVM = args .EVM_HOME
105104
@@ -108,7 +107,7 @@ def __init__(self,prog):
108107except KeyError :
109108 if not args .AUGUSTUS_CONFIG_PATH :
110109 lib .log .error ("$AUGUSTUS_CONFIG_PATH environmental variable not found, Augustus is not properly configured. You can use the --AUGUSTUS_CONFIG_PATH argument to specify a path at runtime." )
111- os . _exit (1 )
110+ sys . exit (1 )
112111 else :
113112 AUGUSTUS = args .AUGUSTUS_CONFIG_PATH
114113
@@ -119,7 +118,7 @@ def __init__(self,prog):
119118 if not lib .which ('gmes_petap.pl' ):
120119 if not args .GENEMARK_PATH :
121120 lib .log .error ("GeneMark not found and $GENEMARK_PATH environmental variable missing, BRAKER1 is not properly configured. You can use the --GENEMARK_PATH argument to specify a path at runtime." )
122- os . _exit (1 )
121+ sys . exit (1 )
123122 else :
124123 GENEMARK_PATH = args .GENEMARK_PATH
125124
@@ -130,7 +129,7 @@ def __init__(self,prog):
130129 if not lib .which ('bamtools' ):
131130 if not args .BAMTOOLS_PATH :
132131 lib .log .error ("Bamtools not found and $BAMTOOLS_PATH environmental variable missing, BRAKER1 is not properly configured. You can use the --BAMTOOLS_PATH argument to specify a path at runtime." )
133- os . _exit (1 )
132+ sys . exit (1 )
134133 else :
135134 BAMTOOLS_PATH = args .BAMTOOLS_PATH
136135
@@ -141,7 +140,7 @@ def __init__(self,prog):
141140AutoAug = os .path .join (AUGUSTUS_BASE , 'scripts' , 'autoAug.pl' )
142141GeneMark2GFF = os .path .join (parentdir , 'util' , 'genemark_gtf2gff3.pl' )
143142
144- programs = ['tblastn' , 'exonerate' , 'makeblastdb' ,'dustmasker' ,'gag.py' ,'tbl2asn' ,'gmes_petap.pl' , 'BuildDatabase' , 'RepeatModeler' , 'RepeatMasker' , GeneMark2GFF , AutoAug , 'bedtools' , 'gmap' , 'gmap_build' , 'blat' , 'pslCDnaFilter' , 'augustus' , 'etraining' , 'rmOutToGFF3.pl' ]
143+ programs = ['tblastn' , 'exonerate' , 'makeblastdb' , 'dustmasker' , 'gag.py' , 'tbl2asn' , 'gmes_petap.pl' , 'BuildDatabase' , 'RepeatModeler' , 'RepeatMasker' , GeneMark2GFF , AutoAug , 'bedtools' , 'gmap' , 'gmap_build' , 'blat' , 'pslCDnaFilter' , 'augustus' , 'etraining' , 'rmOutToGFF3.pl' ]
145144lib .CheckDependencies (programs )
146145
147146#check augustus species now, so that you don't get through script and then find out it is already in DB
@@ -155,10 +154,11 @@ def __init__(self,prog):
155154
156155#check augustus functionality
157156augustuscheck = lib .checkAugustusFunc (AUGUSTUS_BASE )
157+ system_os = lib .systemOS ()
158158if args .rna_bam :
159159 if augustuscheck [1 ] == 0 :
160160 lib .log .error ("ERROR: %s is not installed properly for BRAKER1 (check bam2hints compilation)" % augustuscheck [0 ])
161- os . _exit (1 )
161+ sys . exit (1 )
162162if not augspeciescheck : #means training needs to be done
163163 if augustuscheck [2 ] == 0 :
164164 if 'MacOSX' in system_os :
@@ -170,7 +170,7 @@ def __init__(self,prog):
170170 else :
171171 lib .log .error ("ERROR: %s is not installed properly and this version not work with BUSCO, this is a problem with Augustus compliatation, you may need to compile manually on %s." % (augustuscheck [0 ], system_os ))
172172 if not args .pasa_gff : #first training will use pasa, otherwise BUSCO
173- os . _exit (1 )
173+ sys . exit (1 )
174174 else :
175175 lib .log .info ("Will proceed with PASA models to train Augustus" )
176176
@@ -204,7 +204,7 @@ def __init__(self,prog):
204204header_test = lib .checkFastaHeaders (args .input , args .header_length )
205205if not header_test :
206206 lib .log .error ("Fasta headers on your input have more characters than the max (16), reformat headers to continue." )
207- os . _exit (1 )
207+ sys . exit (1 )
208208
209209#setup augustus parallel command
210210AUGUSTUS_PARALELL = os .path .join (parentdir , 'bin' , 'augustus_parallel.py' )
@@ -248,7 +248,7 @@ def __init__(self,prog):
248248#check for masked genome here
249249if not os .path .isfile (MaskGenome ) or lib .getSize (MaskGenome ) < 10 :
250250 lib .log .error ("RepeatMasking failed, check log files." )
251- os . _exit (1 )
251+ sys . exit (1 )
252252
253253#load contig names and sizes into dictionary.
254254ContigSizes = {}
@@ -258,7 +258,7 @@ def __init__(self,prog):
258258 ContigSizes [rec .id ] = len (rec .seq )
259259 else :
260260 lib .log .error ("Error, duplicate contig names, exiting" )
261- os . _exit (1 )
261+ sys . exit (1 )
262262
263263#check for previous files and setup output files
264264Predictions = os .path .join (args .out , 'predict_misc' , 'gene_predictions.gff3' )
@@ -293,7 +293,7 @@ def __init__(self,prog):
293293 genesources .append (source )
294294 if not genesources :
295295 lib .log .error ("Maker2 GFF not parsed correctly, no gene models found, exiting." )
296- os . _exit (1 )
296+ sys . exit (1 )
297297 for i in genesources :
298298 if i == 'maker' :
299299 output .write ("ABINITIO_PREDICTION\t %s\t 1\n " % i )
@@ -373,7 +373,7 @@ def __init__(self,prog):
373373 #check for protein evidence/format as needed
374374 p2g_out = os .path .join (args .out , 'predict_misc' , 'exonerate.out' )
375375 prot_temp = os .path .join (args .out , 'predict_misc' , 'proteins.combined.fa' )
376- P2G = os .path .join (parentdir , 'bin' ,'funannotate-p2g.py' )
376+ P2G = os .path .join (parentdir , 'bin' , 'funannotate-p2g.py' )
377377 if not args .exonerate_proteins :
378378 if args .protein_evidence :
379379 if os .path .isfile (prot_temp ):
@@ -412,7 +412,7 @@ def __init__(self,prog):
412412 subprocess .call ([ExoConverter , exonerate_out ], stdout = output , stderr = FNULL )
413413 except OSError :
414414 lib .log .error ("$EVM_HOME variable is incorrect, please double-check: %s" % EVM )
415- os . _exit (1 )
415+ sys . exit (1 )
416416 Exonerate = os .path .abspath (Exonerate )
417417 #now run exonerate2 hints for Augustus
418418 exonerate2hints = os .path .join (AUGUSTUS_BASE , 'scripts' , 'exonerate2hints.pl' )
@@ -448,7 +448,7 @@ def __init__(self,prog):
448448 GeneMark = os .path .join (args .out , 'predict_misc' , 'genemark.evm.gff3' )
449449 with open (GeneMark , 'w' ) as output :
450450 with open (GeneMarkTemp , 'rU' ) as input :
451- lines = input .read ().replace ("Augustus" ,"GeneMark" )
451+ lines = input .read ().replace ("Augustus" , "GeneMark" )
452452 output .write (lines )
453453
454454 if args .augustus_gff :
@@ -556,7 +556,7 @@ def __init__(self,prog):
556556 GeneMark = os .path .join (args .out , 'predict_misc' , 'genemark.evm.gff3' )
557557 with open (GeneMark , 'w' ) as output :
558558 with open (GeneMarkTemp , 'rU' ) as input :
559- lines = input .read ().replace ("Augustus" ,"GeneMark" )
559+ lines = input .read ().replace ("Augustus" , "GeneMark" )
560560 output .write (lines )
561561 else : #have training parameters file, so just run genemark with
562562 GeneMarkGFF3 = os .path .join (args .out , 'predict_misc' , 'genemark.gff' )
@@ -586,7 +586,7 @@ def __init__(self,prog):
586586 subprocess .call (['perl' , Converter , GeneMarkTemp ], stdout = output , stderr = FNULL )
587587 with open (GeneMark , 'w' ) as output :
588588 with open (GeneMarkTemp2 , 'rU' ) as input :
589- lines = input .read ().replace ("Augustus" ,"GeneMark" )
589+ lines = input .read ().replace ("Augustus" , "GeneMark" )
590590 output .write (lines )
591591
592592 else :
@@ -602,7 +602,7 @@ def __init__(self,prog):
602602 GeneMark = os .path .join (args .out , 'predict_misc' , 'genemark.evm.gff3' )
603603 with open (GeneMark , 'w' ) as output :
604604 with open (GeneMarkTemp , 'rU' ) as input :
605- lines = input .read ().replace ("Augustus" ,"GeneMark" )
605+ lines = input .read ().replace ("Augustus" , "GeneMark" )
606606 output .write (lines )
607607
608608 if not Augustus :
@@ -644,7 +644,7 @@ def __init__(self,prog):
644644 lib .log .error ("BUSCO training of Augusus failed, check busco logs, exiting" )
645645 #remove the augustus training config folder
646646 shutil .rmtree (os .path .join (AUGUSTUS , 'species' , aug_species ))
647- os . _exit (1 )
647+ sys . exit (1 )
648648 #proper training files exist, now run EVM on busco models to get high quality predictions.
649649 lib .log .info ("BUSCO predictions complete, now formatting for EVM" )
650650 #move the busco folder now where it should reside
@@ -735,12 +735,12 @@ def __init__(self,prog):
735735 total = lib .countGFFgenes (EVM_busco )
736736 except IOError :
737737 lib .log .error ("EVM did not run correctly, output file missing" )
738- os . _exit (1 )
738+ sys . exit (1 )
739739 #check number of gene models, if 0 then failed, delete output file for re-running
740740 if total < 1 :
741741 lib .log .error ("Evidence modeler has failed, exiting" )
742742 os .remove (EVM_busco )
743- os . _exit (1 )
743+ sys . exit (1 )
744744 else :
745745 lib .log .info ('{0:,}' .format (total ) + ' total gene models from EVM' )
746746 #move EVM folder to predict folder
@@ -787,7 +787,7 @@ def __init__(self,prog):
787787 #just double-check that you've gotten here and both Augustus/GeneMark are finished
788788 if not any ([Augustus , GeneMark ]):
789789 lib .log .error ("Augustus or GeneMark prediction is missing, check log files for errors" )
790- os . _exit (1 )
790+ sys . exit (1 )
791791
792792 #GeneMark can fail if you try to pass a single contig, check file length
793793 GM_check = lib .line_count (GeneMark )
@@ -902,12 +902,12 @@ def __init__(self,prog):
902902 total = lib .countGFFgenes (EVM_out )
903903except IOError :
904904 lib .log .error ("EVM did not run correctly, output file missing" )
905- os . _exit (1 )
905+ sys . exit (1 )
906906#check number of gene models, if 0 then failed, delete output file for re-running
907907if total < 1 :
908908 lib .log .error ("Evidence modeler has failed, exiting" )
909909 os .remove (EVM_out )
910- os . _exit (1 )
910+ sys . exit (1 )
911911else :
912912 lib .log .info ('{0:,}' .format (total ) + ' total gene models from EVM' )
913913
@@ -1037,4 +1037,4 @@ def __init__(self,prog):
10371037 os .rename ('funannotate-EVM.log' , os .path .join (args .out , 'logfiles' , 'funannotate-EVM.log' ))
10381038if os .path .isfile ('funannotate-p2g.log' ):
10391039 os .rename ('funannotate-p2g.log' , os .path .join (args .out , 'logfiles' , 'funannotate-p2g.log' ))
1040- os . _exit (1 )
1040+ sys . exit (1 )
0 commit comments