44import json
55from joblib import Parallel , delayed
66import argparse
7- import urllib
87
98import PANDORA
109from PANDORA import Template
@@ -19,9 +18,26 @@ def __init__(self):
1918 self .ref_MHCI_sequences = {}
2019 self .__IDs_list_MHCI = []
2120 self .__IDs_list_MHCII = []
21+ self .reverse = False
22+
23+ def __reverse (self ):
24+ for temp in self .MHCII_data :
25+ peptide = self .MHCII_data [temp ].peptide
26+ self .MHCII_data [temp ].peptide = peptide [::- 1 ]
27+ self .MHCII_data [temp ].anchors = [len (peptide ) - anchor + 1 for anchor in self .MHCII_data [temp ].anchors ][::- 1 ]
28+ self .MHCII_data [temp ].reverse = not self .MHCII_data [temp ].reverse
29+
30+ def set_reverse (self , reverse ):
31+ if reverse :
32+ if not self .reverse :
33+ self .__reverse ()
34+ else :
35+ if self .reverse :
36+ self .__reverse ()
37+ self .reverse = reverse
2238
23- def download_data (self , data_dir = f' { PANDORA .PANDORA_data } /database' , download = True ):
24- """download_data(self, data_dir = f'{ PANDORA.PANDORA_data}/database' , download = True)
39+ def download_data (self , data_dir = PANDORA .PANDORA_data , download = True ):
40+ """download_data(self, data_dir = PANDORA.PANDORA_data, download = True)
2541 Download all MHC structures and get a two lists that contains all MHCI and MHCII IDs respectively"""
2642
2743 if download :
@@ -68,12 +84,12 @@ def update_ref_sequences(self):
6884 Returns a dictionary that can be used to select the desired reference sequence"""
6985 self .ref_MHCI_sequences = Database_functions .generate_mhcseq_database ()
7086
71- def construct_database (self , save = PANDORA .PANDORA_data + '/database/ PANDORA_database.pkl' , data_dir = PANDORA .PANDORA_data ,
87+ def construct_database (self , save = PANDORA .PANDORA_data + '/PANDORA_database.pkl' , data_dir = PANDORA .PANDORA_data ,
7288 MHCI = True , MHCII = True , download = True ,
7389 update_ref_sequences = True ,
7490 remove_biopython_objects = True ,
7591 n_jobs = 1 ):
76- '''construct_database(self, save=PANDORA.PANDORA_data + '/database/ PANDORA_database.pkl', data_dir = PANDORA.PANDORA_data, MHCI=True, MHCII=True, download=True, update_ref_sequences=True, remove_biopython_objects = True, n_jobs = 1)
92+ '''construct_database(self, save=PANDORA.PANDORA_data + '/PANDORA_database.pkl', data_dir = PANDORA.PANDORA_data, MHCI=True, MHCII=True, download=True, update_ref_sequences=True, remove_biopython_objects = True, n_jobs = 1)
7793 Construct the database. Download, clean and add all structures
7894
7995 Args:
@@ -95,7 +111,7 @@ def construct_database(self, save=PANDORA.PANDORA_data + '/database/PANDORA_data
95111 '''
96112 #Generate the necessary folders
97113 create_db_folders ()
98-
114+
99115 # Download the data
100116 self .download_data (download = download , data_dir = data_dir )
101117
@@ -281,21 +297,21 @@ def remove_structure(self, id =''):
281297 self .MHCI_data .pop (id , None )
282298 self .MHCII_data .pop (id , None )
283299
284- def save (self , fn = PANDORA .PANDORA_data + '/database/ PANDORA_database.pkl' ):
300+ def save (self , fn = PANDORA .PANDORA_data + '/PANDORA_database.pkl' ):
285301 """Save the database as a pickle file
286302
287303 :param fn: (str) pathname of file
288304 """
289305 with open (fn , "wb" ) as pkl_file :
290306 pickle .dump (self , pkl_file )
291307
292- def load (file_name = PANDORA .PANDORA_data + '/database/ PANDORA_database.pkl' ):
308+ def load (file_name = PANDORA .PANDORA_data + '/PANDORA_database.pkl' ):
293309 """Loads a pre-generated database
294310
295311
296312 Args:
297313 file_name (str): Dabase file name/path.
298- Defaults to PANDORA.PANDORA_data + '/database/ PANDORA_database.pkl'.
314+ Defaults to PANDORA.PANDORA_data + '/PANDORA_database.pkl'.
299315
300316 Returns:
301317 Database.Database: Database object.
@@ -307,6 +323,9 @@ def load(file_name = PANDORA.PANDORA_data + '/database/PANDORA_database.pkl'):
307323 try :
308324 with open (file_name , 'rb' ) as inpkl :
309325 db = pickle .load (inpkl )
326+ db .reverse = False
327+ for temp in db .MHCII_data :
328+ db .MHCII_data [temp ].reverse = False
310329 return db
311330 except FileNotFoundError :
312331 raise Exception ('Database file not found. Are you sure you have it? If not, run Database.construct_database()' )
@@ -340,7 +359,6 @@ def create_db_folders(db_path=None):
340359 parent_db_path = ('/' ).join (db_path .split ('/' )[:- 1 ])
341360 dirs = [parent_db_path ,
342361 db_path ,
343- f'{ db_path } /database'
344362 f'{ db_path } /mhcseqs' ,
345363 f'{ db_path } /BLAST_databases' ,
346364 f'{ db_path } /PDBs' ,
@@ -361,45 +379,38 @@ def create_db_folders(db_path=None):
361379 else :
362380 print (f'WARNING: folder { D } already exists!' )
363381
364- def fetch_database (db_out_path , db_url = 'https://zenodo.org/records/6373630 ' ):
365- """Downloads the pre-generated database from zotero .
382+ def fetch_database (db_out_path , db_url = 'https://surfdrive.surf.nl/files/index.php/s/D8f0n4ulfeZzsmJ/download ' ):
383+ """Downloads the pre-generated database.
366384
367385 Args:
368386 db_out_path (str): Path to the database to be downloaded,
369387 should be pointing at a "PANDORA_databases" folder.
370- db_url (str, optional): URL for the zenodo database.
371- Defaults to 'https://zenodo.org/records/6373630 '.
388+ db_url (str, optional): URL database.
389+ Defaults to 'https://surfdrive.surf.nl/files/index.php/s/D8f0n4ulfeZzsmJ/download '.
372390
373391 Raises:
374392 Exception: If the PANDORA_database.pkl file is not found in the destination folder,
375393 it raises an exception.
376394 """
377395
378- try :
379- ## Get most recent release url:
380- response = urllib .request .urlopen (db_url )
381- new_release_url = response .geturl ()
382- except Exception as e :
383- print (f'ERROR: received error while fetching the latest database url: { e } ' )
384-
385396 try :
386397 parent_db_path = ('/' ).join (db_out_path .split ('/' )[:- 1 ])
387398
388- print ('Downloading pre-built database from zenodo ...' )
389- os .popen (f'wget { new_release_url } /files/default.tar.gz?download=1 -O { parent_db_path } /default.tar.gz' ).read ()
399+ print ('Downloading pre-built database ...' )
400+ os .popen (f'wget { db_url } -O { parent_db_path } /default.tar.gz' ).read ()
390401 print ('Copying the database' )
391402 os .popen (f'tar -xzvf { parent_db_path } /default.tar.gz -C { parent_db_path } ' ).read ()
392403 os .popen (f'rm { parent_db_path } /default.tar.gz' ).read ()
393404 print ('Checking...' )
394- if not os .path .exists (f'{ db_out_path } /database/ PANDORA_database.pkl' ):
405+ if not os .path .exists (f'{ db_out_path } /PANDORA_database.pkl' ):
395406 print ('Database correctly retrieved' )
396407 else :
397408 print ('ERROR: Something is missing from the retrieved database.' )
398409 print ('Please check the path you provided. Use Database.create_db_folders to generate the necessary folders.' )
399410 raise Exception ('Missing PANDORA_database.pkl' )
400411
401412 except Exception as e :
402- print (f'ERROR : received error while installing database: { e } ' )
413+ print (f'WARNING : received error while installing database: { e } ' )
403414 print ('To be able to use PANDORA you will have to generate a new database. Please follow the instructions in the README.' )
404415
405416def install_database (db_path = '~/PANDORA_databases/default' ):
@@ -410,4 +421,4 @@ def install_database(db_path='~/PANDORA_databases/default'):
410421 Defaults to '~/PANDORA_databases/default'.
411422 """
412423 create_db_folders (db_path )
413- fetch_database (db_out_path = db_path )
424+ fetch_database (db_out_path = db_path )
0 commit comments