Skip to content

Commit 0a6dd4a

Browse files
authored
Merge pull request #289 from X-lab-3D/development
Development
2 parents 8df6c64 + fbda6e8 commit 0a6dd4a

File tree

9 files changed

+3685
-51
lines changed

9 files changed

+3685
-51
lines changed

PANDORA/Database/Database.py

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
import json
55
from joblib import Parallel, delayed
66
import argparse
7-
import urllib
87

98
import PANDORA
109
from PANDORA import Template
@@ -19,9 +18,26 @@ def __init__(self):
1918
self.ref_MHCI_sequences = {}
2019
self.__IDs_list_MHCI = []
2120
self.__IDs_list_MHCII = []
21+
self.reverse = False
22+
23+
def __reverse(self):
24+
for temp in self.MHCII_data:
25+
peptide = self.MHCII_data[temp].peptide
26+
self.MHCII_data[temp].peptide = peptide[::-1]
27+
self.MHCII_data[temp].anchors = [len(peptide) - anchor + 1 for anchor in self.MHCII_data[temp].anchors][::-1]
28+
self.MHCII_data[temp].reverse = not self.MHCII_data[temp].reverse
29+
30+
def set_reverse(self, reverse):
31+
if reverse:
32+
if not self.reverse:
33+
self.__reverse()
34+
else:
35+
if self.reverse:
36+
self.__reverse()
37+
self.reverse = reverse
2238

23-
def download_data(self, data_dir = f'{PANDORA.PANDORA_data}/database', download = True):
24-
"""download_data(self, data_dir = f'{PANDORA.PANDORA_data}/database', download = True)
39+
def download_data(self, data_dir = PANDORA.PANDORA_data, download = True):
40+
"""download_data(self, data_dir = PANDORA.PANDORA_data, download = True)
2541
Download all MHC structures and get a two lists that contains all MHCI and MHCII IDs respectively"""
2642

2743
if download:
@@ -68,12 +84,12 @@ def update_ref_sequences(self):
6884
Returns a dictionary that can be used to select the desired reference sequence"""
6985
self.ref_MHCI_sequences = Database_functions.generate_mhcseq_database()
7086

71-
def construct_database(self, save=PANDORA.PANDORA_data + '/database/PANDORA_database.pkl', data_dir = PANDORA.PANDORA_data,
87+
def construct_database(self, save=PANDORA.PANDORA_data + '/PANDORA_database.pkl', data_dir = PANDORA.PANDORA_data,
7288
MHCI=True, MHCII=True, download=True,
7389
update_ref_sequences=True,
7490
remove_biopython_objects = True,
7591
n_jobs = 1):
76-
'''construct_database(self, save=PANDORA.PANDORA_data + '/database/PANDORA_database.pkl', data_dir = PANDORA.PANDORA_data, MHCI=True, MHCII=True, download=True, update_ref_sequences=True, remove_biopython_objects = True, n_jobs = 1)
92+
'''construct_database(self, save=PANDORA.PANDORA_data + '/PANDORA_database.pkl', data_dir = PANDORA.PANDORA_data, MHCI=True, MHCII=True, download=True, update_ref_sequences=True, remove_biopython_objects = True, n_jobs = 1)
7793
Construct the database. Download, clean and add all structures
7894
7995
Args:
@@ -95,7 +111,7 @@ def construct_database(self, save=PANDORA.PANDORA_data + '/database/PANDORA_data
95111
'''
96112
#Generate the necessary folders
97113
create_db_folders()
98-
114+
99115
# Download the data
100116
self.download_data(download = download, data_dir = data_dir)
101117

@@ -281,21 +297,21 @@ def remove_structure(self, id =''):
281297
self.MHCI_data.pop(id, None)
282298
self.MHCII_data.pop(id, None)
283299

284-
def save(self, fn = PANDORA.PANDORA_data + '/database/PANDORA_database.pkl'):
300+
def save(self, fn = PANDORA.PANDORA_data + '/PANDORA_database.pkl'):
285301
"""Save the database as a pickle file
286302
287303
:param fn: (str) pathname of file
288304
"""
289305
with open(fn, "wb") as pkl_file:
290306
pickle.dump(self, pkl_file)
291307

292-
def load(file_name = PANDORA.PANDORA_data + '/database/PANDORA_database.pkl'):
308+
def load(file_name = PANDORA.PANDORA_data + '/PANDORA_database.pkl'):
293309
"""Loads a pre-generated database
294310
295311
296312
Args:
297313
file_name (str): Dabase file name/path.
298-
Defaults to PANDORA.PANDORA_data + '/database/PANDORA_database.pkl'.
314+
Defaults to PANDORA.PANDORA_data + '/PANDORA_database.pkl'.
299315
300316
Returns:
301317
Database.Database: Database object.
@@ -307,6 +323,9 @@ def load(file_name = PANDORA.PANDORA_data + '/database/PANDORA_database.pkl'):
307323
try:
308324
with open(file_name, 'rb') as inpkl:
309325
db = pickle.load(inpkl)
326+
db.reverse = False
327+
for temp in db.MHCII_data:
328+
db.MHCII_data[temp].reverse = False
310329
return db
311330
except FileNotFoundError:
312331
raise Exception('Database file not found. Are you sure you have it? If not, run Database.construct_database()')
@@ -340,7 +359,6 @@ def create_db_folders(db_path=None):
340359
parent_db_path = ('/').join(db_path.split('/')[:-1])
341360
dirs = [parent_db_path,
342361
db_path,
343-
f'{db_path}/database'
344362
f'{db_path}/mhcseqs',
345363
f'{db_path}/BLAST_databases',
346364
f'{db_path}/PDBs',
@@ -361,45 +379,38 @@ def create_db_folders(db_path=None):
361379
else:
362380
print(f'WARNING: folder {D} already exists!')
363381

364-
def fetch_database(db_out_path, db_url='https://zenodo.org/records/6373630'):
365-
"""Downloads the pre-generated database from zotero.
382+
def fetch_database(db_out_path, db_url='https://surfdrive.surf.nl/files/index.php/s/D8f0n4ulfeZzsmJ/download'):
383+
"""Downloads the pre-generated database.
366384
367385
Args:
368386
db_out_path (str): Path to the database to be downloaded,
369387
should be pointing at a "PANDORA_databases" folder.
370-
db_url (str, optional): URL for the zenodo database.
371-
Defaults to 'https://zenodo.org/records/6373630'.
388+
db_url (str, optional): URL database.
389+
Defaults to 'https://surfdrive.surf.nl/files/index.php/s/D8f0n4ulfeZzsmJ/download'.
372390
373391
Raises:
374392
Exception: If the PANDORA_database.pkl file is not found in the destination folder,
375393
it raises an exception.
376394
"""
377395

378-
try:
379-
## Get most recent release url:
380-
response = urllib.request.urlopen(db_url)
381-
new_release_url = response.geturl()
382-
except Exception as e:
383-
print(f'ERROR: received error while fetching the latest database url: {e}')
384-
385396
try:
386397
parent_db_path = ('/').join(db_out_path.split('/')[:-1])
387398

388-
print('Downloading pre-built database from zenodo...')
389-
os.popen(f'wget {new_release_url}/files/default.tar.gz?download=1 -O {parent_db_path}/default.tar.gz').read()
399+
print('Downloading pre-built database ...')
400+
os.popen(f'wget {db_url} -O {parent_db_path}/default.tar.gz').read()
390401
print('Copying the database')
391402
os.popen(f'tar -xzvf {parent_db_path}/default.tar.gz -C {parent_db_path}').read()
392403
os.popen(f'rm {parent_db_path}/default.tar.gz').read()
393404
print('Checking...')
394-
if not os.path.exists(f'{db_out_path}/database/PANDORA_database.pkl'):
405+
if not os.path.exists(f'{db_out_path}/PANDORA_database.pkl'):
395406
print('Database correctly retrieved')
396407
else:
397408
print('ERROR: Something is missing from the retrieved database.')
398409
print('Please check the path you provided. Use Database.create_db_folders to generate the necessary folders.')
399410
raise Exception('Missing PANDORA_database.pkl')
400411

401412
except Exception as e:
402-
print(f'ERROR: received error while installing database: {e}')
413+
print(f'WARNING: received error while installing database: {e}')
403414
print('To be able to use PANDORA you will have to generate a new database. Please follow the instructions in the README.')
404415

405416
def install_database(db_path='~/PANDORA_databases/default'):
@@ -410,4 +421,4 @@ def install_database(db_path='~/PANDORA_databases/default'):
410421
Defaults to '~/PANDORA_databases/default'.
411422
"""
412423
create_db_folders(db_path)
413-
fetch_database(db_out_path=db_path)
424+
fetch_database(db_out_path=db_path)

0 commit comments

Comments
 (0)