Skip to content

Commit 617f92d

Browse files
CFMR NGSCFMR NGS
authored andcommitted
fix cluster/flanking annotated of SM genes
1 parent db7c8e9 commit 617f92d

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed

bin/funannotate-functional.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -654,9 +654,9 @@ def parseEggNoggMapper(input, output, GeneDict):
654654
RawProductNames = os.path.join(outputdir, 'annotate_misc', 'uniprot_eggnog_raw_names.txt')
655655
#GeneDict[ID] = [{'name': passname, 'product': final_desc}]
656656
with open(RawProductNames, 'w') as uniprottmp:
657-
for k,v in natsorted(GeneProducts.items()):
658-
for x in v: #v is list of dictionaries
659-
uniprottmp.write('{:}\t{:}\t{:}\t{:}\n'.format(k, x['name'], x['product'], x['source']))
657+
for k,v in natsorted(GeneProducts.items()):
658+
for x in v: #v is list of dictionaries
659+
uniprottmp.write('{:}\t{:}\t{:}\t{:}\n'.format(k, x['name'], x['product'], x['source']))
660660

661661
#combine the results from UniProt and Eggnog to parse Gene names and product descriptions
662662
#load curated list
@@ -1047,10 +1047,17 @@ def parseEggNoggMapper(input, output, GeneDict):
10471047
lib.log.info("Cross referencing SM cluster hits with MIBiG database version %s" % versDB.get('mibig'))
10481048
#do a blast best hit search against MIBiG database for cluster annotation, but looping through gene cluster hits
10491049
AllProts = []
1050+
SMgenes = []
10501051
for k, v in lib.dictClusters.items():
10511052
for i in v:
1053+
if '-T' in i:
1054+
ID = i.split('-T')[0]
1055+
else:
1056+
ID = i
10521057
if not i in AllProts:
10531058
AllProts.append(i)
1059+
if not ID in SMgenes:
1060+
SMgenes.append(ID)
10541061
AllProts = set(AllProts)
10551062
mibig_fasta = os.path.join(AntiSmashFolder, 'smcluster.proteins.fasta')
10561063
mibig_blast = os.path.join(AntiSmashFolder, 'smcluster.MIBiG.blast.txt')
@@ -1071,9 +1078,9 @@ def parseEggNoggMapper(input, output, GeneDict):
10711078
for line in input:
10721079
cols = line.split('\t')
10731080
if '-T' in cols[0]:
1074-
ID = cols[0].split('-T')[0]
1081+
ID = cols[0].split('-T')[0]
10751082
else:
1076-
ID = cols[0]
1083+
ID = cols[0]
10771084
hit = cols[1].split('|')
10781085
desc = hit[5]
10791086
cluster = hit[0]
@@ -1151,7 +1158,10 @@ def parseEggNoggMapper(input, output, GeneDict):
11511158
pFAM = []
11521159
IPR = []
11531160
eggnogDesc = 'NA'
1154-
location = 'flanking'
1161+
if name in SMgenes:
1162+
location = 'cluster'
1163+
else:
1164+
location = 'flanking'
11551165
cog = '.'
11561166
for k,v in f.qualifiers.items():
11571167
if k == 'note':
@@ -1166,8 +1176,6 @@ def parseEggNoggMapper(input, output, GeneDict):
11661176
goTerms.append(goterm)
11671177
elif i.startswith('SMCOG'):
11681178
cog = i
1169-
elif i.startswith('antiSMASH:'):
1170-
location = 'cluster'
11711179
else:
11721180
note.append(i)
11731181
if k == 'db_xref':

0 commit comments

Comments
 (0)