Skip to content

Commit 74c367f

Browse files
Jon PalmerJon Palmer
authored andcommitted
fix final annotation table; orthologs and transfactors
1 parent 1ba402a commit 74c367f

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

bin/funannotate-compare.py

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ def __init__(self,prog):
157157
eggnog.append(lib.getEggNogfromNote(GBK))
158158
scinames.append(stats[i][0].replace(' ', '_'))
159159

160-
161160
#convert busco to dictionary
162161
busco = lib.busco_dictFlip(busco)
163162

@@ -515,8 +514,9 @@ def __init__(self,prog):
515514
os.makedirs(os.path.join(args.out, 'tfs'))
516515
#should be able to pull transcription factor counts from InterPro Domains, load into pandas df
517516
iprTF = os.path.join(parentdir, 'lib', 'tf_interpro.txt')
518-
519517
tf = pd.read_csv(iprTF, names=['InterPro', 'Description'])
518+
#convert to dictionary for all annotations later
519+
TFDict = tf.set_index('InterPro')['Description'].to_dict()
520520
iprall = IPRdf.transpose()
521521
iprall.reset_index(inplace=True)
522522
dfmerged = pd.merge(tf,iprall, left_on='InterPro', right_on='index', how='left')
@@ -756,7 +756,7 @@ def __init__(self,prog):
756756
for line in input:
757757
line = line.replace('\n', '')
758758
col = line.split('\t')
759-
genes = col[1].split(',')
759+
genes = col[-1].split(', ')
760760
for i in genes:
761761
orthoDict[i] = col[0]
762762

@@ -785,9 +785,16 @@ def __init__(self,prog):
785785
meropsDict = lib.dictFlip(merops)
786786
cazyDict = lib.dictFlip(cazy)
787787

788+
#get Transcription factors in a dictionary
789+
TFLookup = {}
790+
for k,v in iprDict.items():
791+
for x in v:
792+
IPRid = x.split(':')[0]
793+
if IPRid in TFDict:
794+
TFLookup[k] = TFDict.get(IPRid)
788795

789796
table = []
790-
header = ['GeneID','scaffold:start-end','strand','length','description', 'Ortho Group', 'EggNog', 'BUSCO', 'Secreted', 'Protease family', 'CAZyme family', 'InterPro Domains', 'PFAM Domains', 'GO terms', 'SecMet Cluster', 'SMCOG']
797+
header = ['GeneID','scaffold:start-end','strand','length','description', 'Ortho Group', 'EggNog', 'BUSCO', 'Secreted', 'Protease family', 'CAZyme family', 'Transcription factor', 'InterPro Domains', 'PFAM Domains', 'GO terms', 'SecMet Cluster', 'SMCOG']
791798
for y in range(0,num_input):
792799
outputname = os.path.join(args.out, 'annotations', scinames[y]+'.all.annotations.tsv')
793800
with open(outputname, 'w') as output:
@@ -843,6 +850,10 @@ def __init__(self,prog):
843850
orthogroup = orthoDict.get(ID)
844851
else:
845852
orthogroup = ''
853+
if ID in TFLookup:
854+
transfactor = TFLookup.get(ID)
855+
else:
856+
transfactor = ''
846857
for k,v in f.qualifiers.items():
847858
if k == 'note':
848859
notes = v[0].split('; ')
@@ -855,7 +866,7 @@ def __init__(self,prog):
855866
if i.startswith('SMCOG:'):
856867
smcog = i
857868

858-
final_result = [ID, location, strand, str(length), description, orthogroup, egg, buscogroup, signalphit, meropsdomains, cazydomains, IPRdomains, pfamdomains, goTerms, cluster, smcog]
869+
final_result = [ID, location, strand, str(length), description, orthogroup, egg, buscogroup, signalphit, meropsdomains, cazydomains, transfactor, IPRdomains, pfamdomains, goTerms, cluster, smcog]
859870
output.write("%s\n" % ('\t'.join(final_result)))
860871
############################################
861872

funannotate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ def fmtcols(mylist, cols):
3131
for i in range(0,num_lines))
3232
return "\n".join(lines)
3333

34-
version = '0.3.8'
34+
version = '0.3.9'
3535

3636
default_help = """
3737
Usage: funannotate <command> <arguments>

0 commit comments

Comments
 (0)