1+ #!/usr/bin/env python
2+
3+ import sys
4+ import os
5+ import subprocess
6+ import urllib2
7+ import socket
8+ import argparse
9+ import shutil
10+
11+ #setup menu with argparse
12+ class MyFormatter (argparse .ArgumentDefaultsHelpFormatter ):
13+ def __init__ (self ,prog ):
14+ super (MyFormatter ,self ).__init__ (prog ,max_help_position = 48 )
15+ parser = argparse .ArgumentParser (prog = 'funannotate-test.py' ,
16+ description = '''Script to download and then test funannotate installation''' ,
17+ epilog = """Written by Jon Palmer (2016-2018) [email protected] """ ,
18+ formatter_class = MyFormatter )
19+ parser .add_argument ('-t' ,'--tests' , required = True , nargs = '+' ,
20+ choices = ['all' , 'clean' , 'mask' , 'predict' , 'annotate' , 'busco' , 'rna-seq' , 'compare' ],
21+ help = 'select which tests to run' )
22+ parser .add_argument ('--cpus' , default = 2 , type = int , help = 'Number of CPUs to use' )
23+ args = parser .parse_args ()
24+
25+ download_links = {'mask' : 'https://osf.io/hbryz/download?version=1' ,
26+ 'clean' : 'https://osf.io/8pjbe/download?version=1' ,
27+ 'predict' : 'https://osf.io/te2pf/download?version=1' ,
28+ 'busco' : 'https://osf.io/kyrd9/download?version=1' ,
29+ 'rna-seq' : 'https://osf.io/t7j83/download?version=1' ,
30+ 'annotate' : 'https://osf.io/97pyn/download?version=1' ,
31+ 'compare' : 'https://osf.io/7s9xh/download?version=1' }
32+
33+ def checkFile (input ):
34+ def _getSize (filename ):
35+ st = os .stat (filename )
36+ return st .st_size
37+ if os .path .isfile (input ):
38+ filesize = _getSize (input )
39+ if int (filesize ) < 1 :
40+ return False
41+ else :
42+ return True
43+ elif os .path .islink (input ):
44+ return True
45+ else :
46+ return False
47+
48+ def countfasta (input ):
49+ count = 0
50+ with open (input , 'rU' ) as f :
51+ for line in f :
52+ if line .startswith (">" ):
53+ count += 1
54+ return count
55+
56+ def countGFFgenes (input ):
57+ count = 0
58+ with open (input , 'rU' ) as f :
59+ for line in f :
60+ if "\t gene\t " in line :
61+ count += 1
62+ return count
63+
64+ def runCMD (cmd , dir ):
65+ print ('CMD: {:}' .format (' ' .join (cmd )))
66+ print ("#########################################################" )
67+ subprocess .call (cmd , cwd = dir )
68+
69+ def download (url , name ):
70+ file_name = name
71+ try :
72+ u = urllib2 .urlopen (url )
73+ f = open (file_name , 'wb' )
74+ meta = u .info ()
75+ file_size = int (meta .getheaders ("Content-Length" )[0 ])
76+ print ("Downloading: {0} Bytes: {1}" .format (url , file_size ))
77+ file_size_dl = 0
78+ block_sz = 8192
79+ while True :
80+ buffer = u .read (block_sz )
81+ if not buffer :
82+ break
83+ file_size_dl += len (buffer )
84+ f .write (buffer )
85+ p = float (file_size_dl ) / file_size
86+ status = r"{0} [{1:.2%}]" .format (file_size_dl , p )
87+ status = status + chr (8 )* (len (status )+ 1 )
88+ sys .stdout .write (status )
89+ sys .stdout .flush ()
90+ f .close ()
91+ except socket .error as e :
92+ if e .errno != errno .ECONNRESET :
93+ raise
94+ pass
95+
96+ def runMaskTest ():
97+ print ("#########################################################" )
98+ print ('Running `funannotate mask` unit testing: RepeatModeler --> RepeatMasker' )
99+ tmpdir = 'test-mask_' + pid
100+ os .makedirs (tmpdir )
101+ inputFasta = 'test.fa'
102+ if not os .path .isfile (inputFasta ):
103+ if not os .path .isfile ('test-mask.tar.gz' ):
104+ download (download_links .get ('mask' ), 'test-mask.tar.gz' )
105+ subprocess .call (['tar' , '-zxf' , 'test-mask.tar.gz' ])
106+ shutil .copyfile (inputFasta , os .path .join (tmpdir , inputFasta ))
107+ runCMD (['funannotate' , 'mask' , '-i' , inputFasta , '-o' , 'test.masked.fa' , '--cpus' , str (args .cpus )], tmpdir )
108+ #check that everything worked
109+ assert checkFile (os .path .join (tmpdir , 'test.masked.fa' ))
110+ library = False
111+ for file in os .listdir (tmpdir ):
112+ if file .startswith ('repeatmodeler-library' ):
113+ assert checkFile (os .path .join (tmpdir , file ))
114+ library = True
115+ print ("#########################################################" )
116+ if library :
117+ print ('SUCCESS: `funannotate mask` test complete.' )
118+ shutil .rmtree (tmpdir )
119+ else :
120+ print ('ERROR: `funannotate mask` test failed, RepeatModeler or RepeatMasker not properly installed.' )
121+ print ("#########################################################\n " )
122+
123+ def runCleanTest ():
124+ print ("#########################################################" )
125+ print ('Running `funannotate clean` unit testing: minimap2 mediated assembly duplications' )
126+ tmpdir = 'test-clean_' + pid
127+ os .makedirs (tmpdir )
128+ inputFasta = 'test.clean.fa'
129+ if not os .path .isfile (inputFasta ):
130+ if not os .path .isfile ('test-clean.tar.gz' ):
131+ download (download_links .get ('clean' ), 'test-clean.tar.gz' )
132+ subprocess .call (['tar' , '-zxf' , 'test-clean.tar.gz' ])
133+ shutil .copyfile (inputFasta , os .path .join (tmpdir , inputFasta ))
134+ assert countfasta (os .path .join (tmpdir , inputFasta )) == 6
135+ #run exhaustive
136+ runCMD (['funannotate' , 'clean' , '-i' , inputFasta , '-o' , 'test.exhaustive.fa' , '--exhaustive' ], tmpdir )
137+ print ("#########################################################" )
138+ try :
139+ assert countfasta (os .path .join (tmpdir , 'test.exhaustive.fa' )) == 3
140+ print ('SUCCESS: `funannotate clean` test complete.' )
141+ shutil .rmtree (tmpdir )
142+ except AssertionError :
143+ print ('ERROR: `funannotate clean` test failed.' )
144+ print ("#########################################################\n " )
145+
146+
147+ def runPredictTest ():
148+ print ("#########################################################" )
149+ print ('Running `funannotate predict` unit testing' )
150+ tmpdir = 'test-predict_' + pid
151+ os .makedirs (tmpdir )
152+ inputFasta = 'test.softmasked.fa'
153+ protEvidence = 'protein.evidence.fasta'
154+ if not checkFile (inputFasta ) or not checkFile (protEvidence ):
155+ if not os .path .isfile ('test-predict.tar.gz' ):
156+ download (download_links .get ('predict' ), 'test-predict.tar.gz' )
157+ subprocess .call (['tar' , '-zxf' , 'test-predict.tar.gz' ])
158+ shutil .copyfile (inputFasta , os .path .join (tmpdir , inputFasta ))
159+ shutil .copyfile (protEvidence , os .path .join (tmpdir , protEvidence ))
160+ #run predict
161+ runCMD (['funannotate' , 'predict' , '-i' , inputFasta ,
162+ '--protein_evidence' , protEvidence ,
163+ '-o' , 'annotate' , '--augustus_species' , 'yeast' ,
164+ '--cpus' , str (args .cpus ), '--species' , "Awesome testicus" ], tmpdir )
165+ print ("#########################################################" )
166+ #check results
167+ try :
168+ assert 1500 <= countGFFgenes (os .path .join (tmpdir , 'annotate' , 'predict_results' , 'Awesome_testicus.gff3' )) <= 1700
169+ print ('SUCCESS: `funannotate predict` test complete.' )
170+ shutil .rmtree (tmpdir )
171+ except AssertionError :
172+ print ('ERROR: `funannotate predict` test failed - check logfiles' )
173+ print ("#########################################################\n " )
174+
175+ def runBuscoTest ():
176+ print ("#########################################################" )
177+ print ('Running `funannotate predict` BUSCO-mediated training unit testing' )
178+ #need to delete any pre-existing Augustus training data
179+ try :
180+ AUGUSTUS = os .environ ["AUGUSTUS_CONFIG_PATH" ]
181+ except KeyError :
182+ lib .log .error ("$AUGUSTUS_CONFIG_PATH environmental variable not found, set to continue." )
183+ return
184+ if os .path .isdir (os .path .join (AUGUSTUS , 'species' , 'awesome_busco' )):
185+ shutil .rmtree (os .path .join (AUGUSTUS , 'species' , 'awesome_busco' ))
186+ tmpdir = 'test-busco_' + pid
187+ os .makedirs (tmpdir )
188+ inputFasta = 'test.softmasked.fa'
189+ protEvidence = 'protein.evidence.fasta'
190+ if not checkFile (inputFasta ) or not checkFile (protEvidence ):
191+ if not os .path .isfile ('test-busco.tar.gz' ):
192+ download (download_links .get ('predict' ), 'test-busco.tar.gz' )
193+ subprocess .call (['tar' , '-zxf' , 'test-busco.tar.gz' ])
194+ shutil .copyfile (inputFasta , os .path .join (tmpdir , inputFasta ))
195+ shutil .copyfile (protEvidence , os .path .join (tmpdir , protEvidence ))
196+ #run predict
197+ runCMD (['funannotate' , 'predict' , '-i' , inputFasta ,
198+ '--protein_evidence' , protEvidence ,
199+ '-o' , 'annotate' , '--cpus' , str (args .cpus ),
200+ '--species' , "Awesome busco" ], tmpdir )
201+ print ("#########################################################" )
202+ #check results
203+ try :
204+ assert 1500 <= countGFFgenes (os .path .join (tmpdir , 'annotate' , 'predict_results' , 'Awesome_busco.gff3' )) <= 1700
205+ print ('SUCCESS: `funannotate predict` BUSCO-mediated training test complete.' )
206+ shutil .rmtree (tmpdir )
207+ except AssertionError :
208+ print ('ERROR: `funannotate predict` BUSCO-mediated training test failed - check logfiles' )
209+ print ("#########################################################\n " )
210+
211+ def runAnnotateTest ():
212+ print ("#########################################################" )
213+ print ('Running `funannotate annotate` unit testing' )
214+ tmpdir = 'test-annotate_' + pid
215+ os .makedirs (tmpdir )
216+ input = 'Genome_one.gbk'
217+ iprscan = 'genome_one.iprscan.xml'
218+ emapper = 'genome_one.emapper.annotations'
219+ if not checkFile (input ) or not checkFile (iprscan ) or not checkFile (emapper ):
220+ if not os .path .isfile ('test-annotate.tar.gz' ):
221+ download (download_links .get ('annotate' ), 'test-annotate.tar.gz' )
222+ subprocess .call (['tar' , '-zxf' , 'test-annotate.tar.gz' ])
223+ shutil .copyfile (input , os .path .join (tmpdir , input ))
224+ shutil .copyfile (iprscan , os .path .join (tmpdir , iprscan ))
225+ shutil .copyfile (emapper , os .path .join (tmpdir , emapper ))
226+ #run predict
227+ runCMD (['funannotate' , 'annotate' , '--genbank' , input ,
228+ '-o' , 'annotate' , '--cpus' , str (args .cpus ),
229+ '--iprscan' , iprscan ,
230+ '--eggnog' , emapper ], tmpdir )
231+ print ("#########################################################" )
232+ #check results
233+ try :
234+ assert checkFile (os .path .join (tmpdir , 'annotate' , 'annotate_results' , 'Genome_one.gbk' ))
235+ assert checkFile (os .path .join (tmpdir , 'annotate' , 'annotate_results' , 'Genome_one.sqn' ))
236+ assert checkFile (os .path .join (tmpdir , 'annotate' , 'annotate_results' , 'Genome_one.agp' ))
237+ assert checkFile (os .path .join (tmpdir , 'annotate' , 'annotate_results' , 'Genome_one.tbl' ))
238+ assert checkFile (os .path .join (tmpdir , 'annotate' , 'annotate_results' , 'Genome_one.annotations.txt' ))
239+ print ('SUCCESS: `funannotate annotate` test complete.' )
240+ shutil .rmtree (tmpdir )
241+ except AssertionError :
242+ print ('ERROR: `funannotate annotate` test failed - check logfiles' )
243+ print ("#########################################################\n " )
244+
245+ def runCompareTest ():
246+ print ("#########################################################" )
247+ print ('Running `funannotate compare` unit testing' )
248+ tmpdir = 'test-compare_' + pid
249+ os .makedirs (tmpdir )
250+ input1 = 'Genome_one.gbk'
251+ input2 = 'Genome_two.gbk'
252+ input3 = 'Genome_three.gbk'
253+ if not checkFile (input1 ) or not checkFile (input2 ) or not checkFile (input3 ):
254+ if not os .path .isfile ('test-compare.tar.gz' ):
255+ download (download_links .get ('compare' ), 'test-compare.tar.gz' )
256+ subprocess .call (['tar' , '-zxf' , 'test-compare.tar.gz' ])
257+ shutil .copyfile (input1 , os .path .join (tmpdir , input1 ))
258+ shutil .copyfile (input2 , os .path .join (tmpdir , input2 ))
259+ shutil .copyfile (input3 , os .path .join (tmpdir , input3 ))
260+ #run predict
261+ runCMD (['funannotate' , 'compare' ,
262+ '-i' , input1 , input2 , input3 ,
263+ '-o' , 'compare' , '--cpus' , str (args .cpus ),
264+ '--run_dnds' , 'estimate' , '--outgroup' , 'botrytis_cinerea.dikarya' ], tmpdir )
265+ print ("#########################################################" )
266+ #check results
267+ try :
268+ assert checkFile (os .path .join (tmpdir , 'compare' , 'index.html' ))
269+ assert checkFile (os .path .join (tmpdir , 'compare' , 'phylogeny.html' ))
270+ assert checkFile (os .path .join (tmpdir , 'compare.tar.gz' ))
271+ print ('SUCCESS: `funannotate compare` test complete.' )
272+ shutil .rmtree (tmpdir )
273+ except AssertionError :
274+ print ('ERROR: `funannotate compare` test failed - check logfiles' )
275+ print ("#########################################################\n " )
276+
277+ def runRNAseqTest ():
278+ print ("#########################################################" )
279+ print ('Running funannotate RNA-seq training/prediction unit testing' )
280+ #need to delete any pre-existing Augustus training data
281+ try :
282+ AUGUSTUS = os .environ ["AUGUSTUS_CONFIG_PATH" ]
283+ except KeyError :
284+ lib .log .error ("$AUGUSTUS_CONFIG_PATH environmental variable not found, set to continue." )
285+ return
286+ if os .path .isdir (os .path .join (AUGUSTUS , 'species' , 'awesome_rna' )):
287+ shutil .rmtree (os .path .join (AUGUSTUS , 'species' , 'awesome_rna' ))
288+ tmpdir = 'test-rna_seq_' + pid
289+ os .makedirs (tmpdir )
290+ inputFasta = 'test.softmasked.fa'
291+ protEvidence = 'protein.evidence.fasta'
292+ illumina = 'rna-seq.illumina.fastq.gz'
293+ nanopore = 'rna-seq.nanopore.fastq.gz'
294+ if not checkFile (inputFasta ) or not checkFile (protEvidence ) or not checkFile (illumina ) or not checkFile (nanopore ):
295+ if not os .path .isfile ('test-rna_seq.tar.gz' ):
296+ download (download_links .get ('rna-seq' ), 'test-rna_seq.tar.gz' )
297+ subprocess .call (['tar' , '-zxf' , 'test-rna_seq.tar.gz' ])
298+ for f in [inputFasta , protEvidence , illumina , nanopore ]:
299+ shutil .copyfile (f , os .path .join (tmpdir , f ))
300+ #run train
301+ runCMD (['funannotate' , 'train' , '-i' , inputFasta ,
302+ '--single' , illumina , '--nanopore_mrna' , nanopore ,
303+ '-o' , 'rna-seq' , '--cpus' , str (args .cpus ), '--jaccard_clip' ,
304+ '--species' , "Awesome rna" ], tmpdir )
305+ #run predict
306+ print ("#########################################################" )
307+ print ('Now running `funannotate predict` using RNA-seq training data' )
308+ runCMD (['funannotate' , 'predict' , '-i' , inputFasta ,
309+ '--protein_evidence' , protEvidence ,
310+ '-o' , 'rna-seq' , '--cpus' , str (args .cpus ),
311+ '--species' , "Awesome rna" ], tmpdir )
312+ #run update
313+ print ("#########################################################" )
314+ print ('Now running `funannotate update` to run PASA-mediated UTR addition and multiple transcripts' )
315+ runCMD (['funannotate' , 'update' , '-i' , 'rna-seq' ,
316+ '--cpus' , str (args .cpus )], tmpdir )
317+ print ("#########################################################" )
318+ #check results
319+ try :
320+ assert 1630 <= countGFFgenes (os .path .join (tmpdir , 'rna-seq' , 'update_results' , 'Awesome_rna.gff3' )) <= 1830
321+ print ('SUCCESS: funannotate RNA-seq training/prediction test complete.' )
322+ shutil .rmtree (tmpdir )
323+ except AssertionError :
324+ print ('ERROR: funannotate RNA-seq training/prediction test failed - check logfiles' )
325+ print ("#########################################################\n " )
326+
327+
328+ pid = str (os .getpid ())
329+ if 'clean' in args .tests or 'all' in args .tests :
330+ runCleanTest ()
331+ if 'mask' in args .tests or 'all' in args .tests :
332+ runMaskTest ()
333+ if 'predict' in args .tests or 'all' in args .tests :
334+ runPredictTest ()
335+ if 'busco' in args .tests or 'all' in args .tests :
336+ runBuscoTest ()
337+ if 'rna-seq' in args .tests or 'all' in args .tests :
338+ runRNAseqTest ()
339+ if 'annotate' in args .tests or 'all' in args .tests :
340+ runAnnotateTest ()
341+ if 'compare' in args .tests or 'all' in args .tests :
342+ runCompareTest ()
0 commit comments