@@ -36,7 +36,7 @@ def MEROPSBlast(input, cpus, evalue, tmpdir, output, diamond=True):
3636 lib .runSubprocess (cmd , '.' , lib .log )
3737 # parse results
3838 with open (output , 'w' ) as out :
39- with open (blast_tmp , 'rU ' ) as results :
39+ with open (blast_tmp , 'r ' ) as results :
4040 for qresult in SearchIO .parse (results , "blast-xml" ):
4141 hits = qresult .hits
4242 ID = qresult .id
@@ -66,7 +66,7 @@ def SwissProtBlast(input, cpus, evalue, tmpdir, GeneDict, diamond=True):
6666 # parse results
6767 counter = 0
6868 total = 0
69- with open (blast_tmp , 'rU ' ) as results :
69+ with open (blast_tmp , 'r ' ) as results :
7070 for qresult in SearchIO .parse (results , "blast-xml" ):
7171 hits = qresult .hits
7272 qlen = qresult .seq_len
@@ -155,7 +155,7 @@ def getEggNogHeaders(input):
155155 12 eggNOG annot
156156 '''
157157 IDi , DBi , OGi , Genei , COGi , Desci = (None ,)* 6
158- with open (input , 'rU ' ) as infile :
158+ with open (input , 'r ' ) as infile :
159159 for line in infile :
160160 if line .startswith ('#query_name' ): # this is HEADER
161161 line = line .rstrip ()
@@ -171,14 +171,57 @@ def getEggNogHeaders(input):
171171 IDi , DBi , OGi , Genei , COGi , Desci = (0 , 8 , 9 , 4 , 11 , 12 )
172172 return IDi , DBi , OGi , Genei , COGi , Desci
173173
174+ def getEggNogHeadersv2 (input ):
175+ '''
176+ function to get the headers from eggnog mapper annotations
177+ web-based eggnog mapper has no header....
178+ 1. query_name
179+ 2. seed eggNOG ortholog
180+ 3. seed ortholog evalue
181+ 4. seed ortholog score
182+ 5. Predicted taxonomic group
183+ 6. Predicted protein name
184+ 7. Gene Ontology terms
185+ 8. EC number
186+ 9. KEGG_ko
187+ 10. KEGG_Pathway
188+ 11. KEGG_Module
189+ 12. KEGG_Reaction
190+ 13. KEGG_rclass
191+ 14. BRITE
192+ 15. KEGG_TC
193+ 16. CAZy
194+ 17. BiGG Reaction
195+ 18. tax_scope: eggNOG taxonomic level used for annotation
196+ 19. eggNOG OGs
197+ 20. bestOG (deprecated, use smallest from eggnog OGs)
198+ 21. COG Functional Category
199+ 22. eggNOG free text description
200+ '''
201+ IDi , DBi , OGi , Genei , COGi , Desci = (None ,)* 6
202+ with open (input , 'r' ) as infile :
203+ for line in infile :
204+ if line .startswith ('#query_name' ): # this is HEADER
205+ line = line .rstrip ()
206+ headerCols = line .split ('\t ' )
207+ IDi = item2index (headerCols , 'query_name' )
208+ Genei = item2index (headerCols , 'Preferred_name' )
209+ DBi = item2index (headerCols , 'taxonomic scope' )
210+ OGi = item2index (headerCols , 'eggNOG OGs' )
211+ COGi = item2index (headerCols , 'COG Functional cat.' )
212+ Desci = item2index (headerCols , 'eggNOG free text desc.' )
213+ break
214+ if not IDi : # then no header file, so have to guess
215+ IDi , DBi , OGi , Genei , COGi , Desci = (0 , 6 , 9 , 4 , 11 , 12 )
216+ return IDi , DBi , OGi , Genei , COGi , Desci
174217
175218def parseEggNoggMapper (input , output , GeneDict ):
176219 Definitions = {}
177220 # indexes from header file
178221 IDi , DBi , OGi , Genei , COGi , Desci = getEggNogHeaders (input )
179222 # take annotations file from eggnog-mapper and create annotations
180223 with open (output , 'w' ) as out :
181- with open (input , 'rU ' ) as infile :
224+ with open (input , 'r ' ) as infile :
182225 for line in infile :
183226 line = line .replace ('\n ' , '' )
184227 if line .startswith ('#' ):
@@ -330,7 +373,7 @@ def __init__(self, prog):
330373 lib .log .error ('Database not properly configured, %s missing. Run funannotate database and/or funannotate setup.' %
331374 os .path .join (FUNDB , 'funannotate-db-info.txt' ))
332375 sys .exit (1 )
333- with open (os .path .join (FUNDB , 'funannotate-db-info.txt' ), 'rU ' ) as dbfile :
376+ with open (os .path .join (FUNDB , 'funannotate-db-info.txt' ), 'r ' ) as dbfile :
334377 for line in dbfile :
335378 line = line .strip ()
336379 name , type , file , version , date , num_records , mdchecksum = line .split (
@@ -526,7 +569,7 @@ def __init__(self, prog):
526569 genbank )
527570 # since can't find a way to propage the WGS_accession, writing to a file and then parse here
528571 if os .path .isfile (os .path .join (outputdir , 'update_results' , 'WGS_accession.txt' )):
529- with open (os .path .join (outputdir , 'update_results' , 'WGS_accession.txt' ), 'rU ' ) as infile :
572+ with open (os .path .join (outputdir , 'update_results' , 'WGS_accession.txt' ), 'r ' ) as infile :
530573 for line in infile :
531574 line = line .replace ('\n ' , '' )
532575 if line == 'None' :
@@ -646,7 +689,7 @@ def __init__(self, prog):
646689 lib .log .info ("Combining UniProt/EggNog gene and product names using Gene2Product version %s" %
647690 versDB .get ('gene2product' ))
648691 CuratedNames = {}
649- with open (os .path .join (FUNDB , 'ncbi_cleaned_gene_products.txt' ), 'rU ' ) as input :
692+ with open (os .path .join (FUNDB , 'ncbi_cleaned_gene_products.txt' ), 'r ' ) as input :
650693 for line in input :
651694 line = line .strip ()
652695 if line .startswith ('#' ):
@@ -937,7 +980,7 @@ def __init__(self, prog):
937980
938981 # to update annotations, user can pass --fix or --remove, update Annotations here
939982 if args .fix :
940- with open (args .fix , 'rU ' ) as fixfile :
983+ with open (args .fix , 'r ' ) as fixfile :
941984 for line in fixfile :
942985 line = line .strip ()
943986 if line .startswith ('#' ):
@@ -958,7 +1001,7 @@ def __init__(self, prog):
9581001 Gene2ProdFinal [cols [0 ]] = (cols [1 ], cols [2 ])
9591002
9601003 if args .remove :
961- with open (args .remove , 'rU ' ) as removefile :
1004+ with open (args .remove , 'r ' ) as removefile :
9621005 for line in removefile :
9631006 line = line .strip ()
9641007 if line .startswith ('#' ):
@@ -988,13 +1031,13 @@ def __init__(self, prog):
9881031 if args .p2g :
9891032 p2gfile = args .p2g
9901033 if p2gfile :
991- with open (p2gfile , 'rU ' ) as input :
1034+ with open (p2gfile , 'r ' ) as input :
9921035 for line in input :
9931036 cols = line .split ('\t ' )
9941037 if not cols [0 ] in p2g :
9951038 p2g [cols [0 ]] = cols [1 ]
9961039 with open (os .path .join (outputdir , 'annotate_misc' , 'tbl2asn' , 'genome.tbl' ), 'w' ) as outfile :
997- with open (os .path .join (outputdir , 'annotate_misc' , 'tbl2asn' , 'genome.tbl.bak' ), 'rU ' ) as infile :
1040+ with open (os .path .join (outputdir , 'annotate_misc' , 'tbl2asn' , 'genome.tbl.bak' ), 'r ' ) as infile :
9981041 for line in infile :
9991042 line = line .replace ('\n ' , '' )
10001043 if line .startswith ('\t \t \t protein_id' ) or line .startswith ('\t \t \t transcript_id' ):
@@ -1147,7 +1190,7 @@ def __init__(self, prog):
11471190 AntiSmashFolder , 'smcluster.MIBiG.blast.txt' )
11481191 mibig_db = os .path .join (FUNDB , 'mibig.dmnd' )
11491192 with open (mibig_fasta , 'w' ) as output :
1150- with open (Proteins , 'rU ' ) as input :
1193+ with open (Proteins , 'r ' ) as input :
11511194 SeqRecords = SeqIO .parse (Proteins , 'fasta' )
11521195 for record in SeqRecords :
11531196 genename = record .id
@@ -1160,7 +1203,7 @@ def __init__(self, prog):
11601203 lib .runSubprocess (cmd , '.' , lib .log )
11611204 # now parse blast results to get {qseqid: hit}
11621205 MIBiGBlast = {}
1163- with open (mibig_blast , 'rU ' ) as input :
1206+ with open (mibig_blast , 'r ' ) as input :
11641207 for line in input :
11651208 cols = line .split ('\t ' )
11661209 if '-T' in cols [0 ]:
@@ -1180,15 +1223,15 @@ def __init__(self, prog):
11801223
11811224 # load in antismash cluster bed file to slice record
11821225 slicing = []
1183- with open (AntiSmashBed , 'rU ' ) as antibed :
1226+ with open (AntiSmashBed , 'r ' ) as antibed :
11841227 for line in antibed :
11851228 cols = line .split ('\t ' )
11861229 # chr, cluster, start, stop in a tuple
11871230 cluster = (cols [0 ], cols [3 ], cols [1 ], cols [2 ])
11881231 slicing .append (cluster )
11891232 Offset = {}
11901233 # Get each cluster + 15 Kb in each direction to make sure you can see the context of the cluster
1191- with open (os .path .join (ResultsFolder , organism_name + '.gbk' ), 'rU ' ) as gbk :
1234+ with open (os .path .join (ResultsFolder , organism_name + '.gbk' ), 'r ' ) as gbk :
11921235 SeqRecords = SeqIO .parse (gbk , 'genbank' )
11931236 for record in SeqRecords :
11941237 for f in record .features :
@@ -1221,7 +1264,7 @@ def __init__(self, prog):
12211264 output .write ("#%s\n " % base )
12221265 output .write (
12231266 "#GeneID\t Chromosome:start-stop\t Strand\t ClusterPred\t Backbone Enzyme\t Backbone Domains\t Product\t smCOGs\t EggNog\t InterPro\t PFAM\t GO terms\t Notes\t MIBiG Blast\t Protein Seq\t DNA Seq\n " )
1224- with open (file , 'rU ' ) as input :
1267+ with open (file , 'r ' ) as input :
12251268 SeqRecords = SeqIO .parse (input , 'genbank' )
12261269 for record in SeqRecords :
12271270 for f in record .features :
@@ -1333,7 +1376,7 @@ def __init__(self, prog):
13331376 finallist .append (file )
13341377 with open (ClustersOut , 'w' ) as output :
13351378 for file in natsorted (finallist ):
1336- with open (file , 'rU ' ) as input :
1379+ with open (file , 'r ' ) as input :
13371380 output .write (input .read ())
13381381 output .write ('\n \n ' )
13391382
0 commit comments