@@ -654,9 +654,9 @@ def parseEggNoggMapper(input, output, GeneDict):
654654RawProductNames = os .path .join (outputdir , 'annotate_misc' , 'uniprot_eggnog_raw_names.txt' )
655655#GeneDict[ID] = [{'name': passname, 'product': final_desc}]
656656with open (RawProductNames , 'w' ) as uniprottmp :
657- for k ,v in natsorted (GeneProducts .items ()):
658- for x in v : #v is list of dictionaries
659- uniprottmp .write ('{:}\t {:}\t {:}\t {:}\n ' .format (k , x ['name' ], x ['product' ], x ['source' ]))
657+ for k ,v in natsorted (GeneProducts .items ()):
658+ for x in v : #v is list of dictionaries
659+ uniprottmp .write ('{:}\t {:}\t {:}\t {:}\n ' .format (k , x ['name' ], x ['product' ], x ['source' ]))
660660
661661#combine the results from UniProt and Eggnog to parse Gene names and product descriptions
662662#load curated list
@@ -1047,10 +1047,17 @@ def parseEggNoggMapper(input, output, GeneDict):
10471047 lib .log .info ("Cross referencing SM cluster hits with MIBiG database version %s" % versDB .get ('mibig' ))
10481048 #do a blast best hit search against MIBiG database for cluster annotation, but looping through gene cluster hits
10491049 AllProts = []
1050+ SMgenes = []
10501051 for k , v in lib .dictClusters .items ():
10511052 for i in v :
1053+ if '-T' in i :
1054+ ID = i .split ('-T' )[0 ]
1055+ else :
1056+ ID = i
10521057 if not i in AllProts :
10531058 AllProts .append (i )
1059+ if not ID in SMgenes :
1060+ SMgenes .append (ID )
10541061 AllProts = set (AllProts )
10551062 mibig_fasta = os .path .join (AntiSmashFolder , 'smcluster.proteins.fasta' )
10561063 mibig_blast = os .path .join (AntiSmashFolder , 'smcluster.MIBiG.blast.txt' )
@@ -1071,9 +1078,9 @@ def parseEggNoggMapper(input, output, GeneDict):
10711078 for line in input :
10721079 cols = line .split ('\t ' )
10731080 if '-T' in cols [0 ]:
1074- ID = cols [0 ].split ('-T' )[0 ]
1081+ ID = cols [0 ].split ('-T' )[0 ]
10751082 else :
1076- ID = cols [0 ]
1083+ ID = cols [0 ]
10771084 hit = cols [1 ].split ('|' )
10781085 desc = hit [5 ]
10791086 cluster = hit [0 ]
@@ -1151,7 +1158,10 @@ def parseEggNoggMapper(input, output, GeneDict):
11511158 pFAM = []
11521159 IPR = []
11531160 eggnogDesc = 'NA'
1154- location = 'flanking'
1161+ if name in SMgenes :
1162+ location = 'cluster'
1163+ else :
1164+ location = 'flanking'
11551165 cog = '.'
11561166 for k ,v in f .qualifiers .items ():
11571167 if k == 'note' :
@@ -1166,8 +1176,6 @@ def parseEggNoggMapper(input, output, GeneDict):
11661176 goTerms .append (goterm )
11671177 elif i .startswith ('SMCOG' ):
11681178 cog = i
1169- elif i .startswith ('antiSMASH:' ):
1170- location = 'cluster'
11711179 else :
11721180 note .append (i )
11731181 if k == 'db_xref' :
0 commit comments