Skip to content

Commit

Permalink
Merge pull request #289 from X-lab-3D/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
DTRademaker authored Oct 16, 2024
2 parents 8df6c64 + fbda6e8 commit 0a6dd4a
Show file tree
Hide file tree
Showing 9 changed files with 3,685 additions and 51 deletions.
63 changes: 37 additions & 26 deletions PANDORA/Database/Database.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import json
from joblib import Parallel, delayed
import argparse
import urllib

import PANDORA
from PANDORA import Template
Expand All @@ -19,9 +18,26 @@ def __init__(self):
self.ref_MHCI_sequences = {}
self.__IDs_list_MHCI = []
self.__IDs_list_MHCII = []
self.reverse = False

def __reverse(self):
for temp in self.MHCII_data:
peptide = self.MHCII_data[temp].peptide
self.MHCII_data[temp].peptide = peptide[::-1]
self.MHCII_data[temp].anchors = [len(peptide) - anchor + 1 for anchor in self.MHCII_data[temp].anchors][::-1]
self.MHCII_data[temp].reverse = not self.MHCII_data[temp].reverse

def set_reverse(self, reverse):
if reverse:
if not self.reverse:
self.__reverse()
else:
if self.reverse:
self.__reverse()
self.reverse = reverse

def download_data(self, data_dir = f'{PANDORA.PANDORA_data}/database', download = True):
"""download_data(self, data_dir = f'{PANDORA.PANDORA_data}/database', download = True)
def download_data(self, data_dir = PANDORA.PANDORA_data, download = True):
"""download_data(self, data_dir = PANDORA.PANDORA_data, download = True)
Download all MHC structures and get a two lists that contains all MHCI and MHCII IDs respectively"""

if download:
Expand Down Expand Up @@ -68,12 +84,12 @@ def update_ref_sequences(self):
Returns a dictionary that can be used to select the desired reference sequence"""
self.ref_MHCI_sequences = Database_functions.generate_mhcseq_database()

def construct_database(self, save=PANDORA.PANDORA_data + '/database/PANDORA_database.pkl', data_dir = PANDORA.PANDORA_data,
def construct_database(self, save=PANDORA.PANDORA_data + '/PANDORA_database.pkl', data_dir = PANDORA.PANDORA_data,
MHCI=True, MHCII=True, download=True,
update_ref_sequences=True,
remove_biopython_objects = True,
n_jobs = 1):
'''construct_database(self, save=PANDORA.PANDORA_data + '/database/PANDORA_database.pkl', data_dir = PANDORA.PANDORA_data, MHCI=True, MHCII=True, download=True, update_ref_sequences=True, remove_biopython_objects = True, n_jobs = 1)
'''construct_database(self, save=PANDORA.PANDORA_data + '/PANDORA_database.pkl', data_dir = PANDORA.PANDORA_data, MHCI=True, MHCII=True, download=True, update_ref_sequences=True, remove_biopython_objects = True, n_jobs = 1)
Construct the database. Download, clean and add all structures
Args:
Expand All @@ -95,7 +111,7 @@ def construct_database(self, save=PANDORA.PANDORA_data + '/database/PANDORA_data
'''
#Generate the necessary folders
create_db_folders()

# Download the data
self.download_data(download = download, data_dir = data_dir)

Expand Down Expand Up @@ -281,21 +297,21 @@ def remove_structure(self, id =''):
self.MHCI_data.pop(id, None)
self.MHCII_data.pop(id, None)

def save(self, fn = PANDORA.PANDORA_data + '/database/PANDORA_database.pkl'):
def save(self, fn = PANDORA.PANDORA_data + '/PANDORA_database.pkl'):
"""Save the database as a pickle file
:param fn: (str) pathname of file
"""
with open(fn, "wb") as pkl_file:
pickle.dump(self, pkl_file)

def load(file_name = PANDORA.PANDORA_data + '/database/PANDORA_database.pkl'):
def load(file_name = PANDORA.PANDORA_data + '/PANDORA_database.pkl'):
"""Loads a pre-generated database
Args:
file_name (str): Dabase file name/path.
Defaults to PANDORA.PANDORA_data + '/database/PANDORA_database.pkl'.
Defaults to PANDORA.PANDORA_data + '/PANDORA_database.pkl'.
Returns:
Database.Database: Database object.
Expand All @@ -307,6 +323,9 @@ def load(file_name = PANDORA.PANDORA_data + '/database/PANDORA_database.pkl'):
try:
with open(file_name, 'rb') as inpkl:
db = pickle.load(inpkl)
db.reverse = False
for temp in db.MHCII_data:
db.MHCII_data[temp].reverse = False
return db
except FileNotFoundError:
raise Exception('Database file not found. Are you sure you have it? If not, run Database.construct_database()')
Expand Down Expand Up @@ -340,7 +359,6 @@ def create_db_folders(db_path=None):
parent_db_path = ('/').join(db_path.split('/')[:-1])
dirs = [parent_db_path,
db_path,
f'{db_path}/database'
f'{db_path}/mhcseqs',
f'{db_path}/BLAST_databases',
f'{db_path}/PDBs',
Expand All @@ -361,45 +379,38 @@ def create_db_folders(db_path=None):
else:
print(f'WARNING: folder {D} already exists!')

def fetch_database(db_out_path, db_url='https://zenodo.org/records/6373630'):
"""Downloads the pre-generated database from zotero.
def fetch_database(db_out_path, db_url='https://surfdrive.surf.nl/files/index.php/s/D8f0n4ulfeZzsmJ/download'):
"""Downloads the pre-generated database.
Args:
db_out_path (str): Path to the database to be downloaded,
should be pointing at a "PANDORA_databases" folder.
db_url (str, optional): URL for the zenodo database.
Defaults to 'https://zenodo.org/records/6373630'.
db_url (str, optional): URL database.
Defaults to 'https://surfdrive.surf.nl/files/index.php/s/D8f0n4ulfeZzsmJ/download'.
Raises:
Exception: If the PANDORA_database.pkl file is not found in the destination folder,
it raises an exception.
"""

try:
## Get most recent release url:
response = urllib.request.urlopen(db_url)
new_release_url = response.geturl()
except Exception as e:
print(f'ERROR: received error while fetching the latest database url: {e}')

try:
parent_db_path = ('/').join(db_out_path.split('/')[:-1])

print('Downloading pre-built database from zenodo...')
os.popen(f'wget {new_release_url}/files/default.tar.gz?download=1 -O {parent_db_path}/default.tar.gz').read()
print('Downloading pre-built database ...')
os.popen(f'wget {db_url} -O {parent_db_path}/default.tar.gz').read()
print('Copying the database')
os.popen(f'tar -xzvf {parent_db_path}/default.tar.gz -C {parent_db_path}').read()
os.popen(f'rm {parent_db_path}/default.tar.gz').read()
print('Checking...')
if not os.path.exists(f'{db_out_path}/database/PANDORA_database.pkl'):
if not os.path.exists(f'{db_out_path}/PANDORA_database.pkl'):
print('Database correctly retrieved')
else:
print('ERROR: Something is missing from the retrieved database.')
print('Please check the path you provided. Use Database.create_db_folders to generate the necessary folders.')
raise Exception('Missing PANDORA_database.pkl')

except Exception as e:
print(f'ERROR: received error while installing database: {e}')
print(f'WARNING: received error while installing database: {e}')
print('To be able to use PANDORA you will have to generate a new database. Please follow the instructions in the README.')

def install_database(db_path='~/PANDORA_databases/default'):
Expand All @@ -410,4 +421,4 @@ def install_database(db_path='~/PANDORA_databases/default'):
Defaults to '~/PANDORA_databases/default'.
"""
create_db_folders(db_path)
fetch_database(db_out_path=db_path)
fetch_database(db_out_path=db_path)
Loading

0 comments on commit 0a6dd4a

Please sign in to comment.