Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
saramsey committed Oct 19, 2023
1 parent c9dd387 commit 63e0449
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 51 deletions.
83 changes: 42 additions & 41 deletions code/ARAX/ARAXQuery/ARAX_database_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# NOTE: this module is only to be used either as a CLI script or in the
# __main__.py Flask application at application start-up. Please do not
# instantiate this class and call `check_databases` at query time. -SAR
# instantiate this class and call `update_databases` at query time. -SAR

import os
import sys
Expand All @@ -22,9 +22,9 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs)


class ARAXDatabaseManager:
def __init__(self):
def __init__(self, allow_downloads=False):
self.RTXConfig = RTXConfiguration()

self.allow_downloads = allow_downloads
pathlist = os.path.realpath(__file__).split(os.path.sep)
RTXindex = pathlist.index("RTX")

Expand Down Expand Up @@ -209,10 +209,12 @@ def __init__(self):
}

def update_databases(self, debug = True, response = None):
if not self.allow_downloads:
raise ValueError("in ARAXDatabaseManager, update_databases called with self.allow_downloads=False")
debug = True
# First ensure we have a db versions file if we're in a docker container (since host has dbs predownloaded)
if os.path.exists(self.docker_databases_dir_path) and not os.path.exists(versions_path):
self.write_db_versions_file(debug=True)
self._write_db_versions_file(debug=True)

# Then ensure each database/symlink is up to date
if os.path.exists(versions_path):
Expand All @@ -222,7 +224,7 @@ def update_databases(self, debug = True, response = None):
# Download databases to a persistent central location if this is a docker instance (like arax.ncats.io)
if os.path.exists(self.docker_databases_dir_path):
eprint(f"Downloading any missing databases from arax-databases.rtx.ai to {self.docker_databases_dir_path}")
self.download_to_mnt(debug=debug, skip_if_exists=True, remove_unused=False)
self._download_to_mnt(debug=debug, skip_if_exists=True, remove_unused=False)

# Check that each database exists locally (or a symlink to it does, in the case of a docker host machine)
for database_name, local_path in self.local_paths.items(): # iterate through all databases
Expand All @@ -231,20 +233,20 @@ def update_databases(self, debug = True, response = None):
eprint(f"{database_name} ({local_path}) not present locally, downloading or symlinking now...")
if response is not None:
response.debug(f"Updating the local file for {database_name}...")
self.download_database(remote_location=self.remote_locations[database_name],
local_destination_path=self.local_paths[database_name],
local_symlink_target_path=self.docker_central_paths[database_name],
debug=debug)
self._download_database(remote_location=self.remote_locations[database_name],
local_destination_path=self.local_paths[database_name],
local_symlink_target_path=self.docker_central_paths[database_name],
debug=debug)
elif local_versions[database_name]['version'] != self.db_versions[database_name]['version']: # If database is present but wrong version
if debug:
eprint(f"{database_name} has a local version, '{local_versions[database_name]['version']}', which does not match the remote version, '{self.db_versions[database_name]['version']}'.")
eprint("downloading remote version...")
if response is not None:
response.debug(f"Updating the local file for {database_name}...")
self.download_database(remote_location=self.remote_locations[database_name],
local_destination_path=self.local_paths[database_name],
local_symlink_target_path=self.docker_central_paths[database_name],
debug=debug)
self._download_database(remote_location=self.remote_locations[database_name],
local_destination_path=self.local_paths[database_name],
local_symlink_target_path=self.docker_central_paths[database_name],
debug=debug)
if os.path.exists(self.local_paths[database_name]): # check that download worked if so remove old version
if debug:
eprint("Download successful. Removing local version...")
Expand All @@ -261,18 +263,21 @@ def update_databases(self, debug = True, response = None):
eprint(f"{database_name} ({self.local_paths[database_name]}) not present locally, downloading or symlinking now......")
if response is not None:
response.debug(f"Updating the local file for {database_name}...")
self.download_database(remote_location=self.remote_locations[database_name], local_destination_path=self.local_paths[database_name], local_symlink_target_path=self.docker_central_paths[database_name], debug=debug)
self._download_database(remote_location=self.remote_locations[database_name],
local_destination_path=self.local_paths[database_name],
local_symlink_target_path=self.docker_central_paths[database_name],
debug=debug)
else:
if debug:
eprint(f"Local version of {database_name} ({local_path}) matches the remote version, skipping...")
self.write_db_versions_file()
self._write_db_versions_file()
else: # If database manager has never been run download all databases
if debug:
eprint("No local verson json file present. Downloading all databases...")
if response is not None:
response.debug(f"No local verson json file present. Downloading all databases...")
self.force_download_all(debug=debug)
self.write_db_versions_file()
self._force_download_all(debug=debug)
self._write_db_versions_file()
return response

@staticmethod
Expand Down Expand Up @@ -330,7 +335,7 @@ def check_date(self, file_path, max_days = 31):
else:
return True

def download_database(self, remote_location, local_destination_path, local_symlink_target_path, debug=False):
def _download_database(self, remote_location, local_destination_path, local_symlink_target_path, debug=False):
if local_symlink_target_path is not None and os.path.exists(local_symlink_target_path): # if on the server symlink instead of downloading
self.symlink_database(symlink_path=local_destination_path, target_path=local_symlink_target_path)
else:
Expand All @@ -346,7 +351,7 @@ def rsync_database(self, remote_location, local_path, debug=False):
#os.system(f"rsync -Lhzc{verbose} --progress {remote_location} {local_path}")
eprint(f"ERROR: Wanted to run the following rsync, but it isn't going to work anyway. Skipping: rsync -Lhzc{verbose} --progress {remote_location} {local_path}")

def download_to_mnt(self, debug=False, skip_if_exists=False, remove_unused=False):
def _download_to_mnt(self, debug=False, skip_if_exists=False, remove_unused=False):
"""
This method downloads databases to the docker host machine in a central location.
"""
Expand All @@ -355,7 +360,7 @@ def download_to_mnt(self, debug=False, skip_if_exists=False, remove_unused=False
for database_name in self.remote_locations.keys():
database_dir = os.path.sep.join(self.docker_central_paths[database_name].split('/')[:-1])
if debug:
print(f"On database {database_name} in download_to_mnt()")
print(f"On database {database_name} in _download_to_mnt()")
if not os.path.exists(database_dir):
if debug:
print(f"Creating directory {database_dir}...")
Expand All @@ -366,18 +371,21 @@ def download_to_mnt(self, debug=False, skip_if_exists=False, remove_unused=False
if debug:
print(f"Initiating download from location {remote_location}; "
f"saving to {docker_host_local_path}")
self.download_database(remote_location=remote_location,
local_destination_path=docker_host_local_path,
local_symlink_target_path=None,
debug=debug)
self._download_database(remote_location=remote_location,
local_destination_path=docker_host_local_path,
local_symlink_target_path=None,
debug=debug)
else:
print(f" Database already exists, no need to download") if debug else None
def force_download_all(self, debug=False):

def _force_download_all(self, debug=False):
for database_name in self.remote_locations.keys():
if debug:
print(f"Downloading {self.remote_locations[database_name].split('/')[-1]}...")
self.download_database(remote_location=self.remote_locations[database_name], local_destination_path=self.local_paths[database_name], local_symlink_target_path=self.docker_central_paths[database_name], debug=debug)
self._download_database(remote_location=self.remote_locations[database_name],
local_destination_path=self.local_paths[database_name],
local_symlink_target_path=self.docker_central_paths[database_name],
debug=debug)

def check_all(self, max_days=31, debug=False):
update_flag = False
Expand All @@ -402,14 +410,7 @@ def check_all(self, max_days=31, debug=False):
return True
return update_flag

def update_databases_by_date(self, max_days=31, debug=False):
for database_name, local_path in self.local_paths.items():
if self.check_date(local_path, max_days=max_days):
if debug:
print(f"{database_name} not present or older than {max_days} days. Updating file...")
self.download_database(remote_location=self.remote_locations[database_name], local_destination_path=local_path, local_symlink_target_path=self.docker_central_paths[database_name], debug=debug)

def write_db_versions_file(self, debug=False):
def _write_db_versions_file(self, debug=False):
print(f"saving new version file to {versions_path}") if debug else None
with open(versions_path, "w") as fid:
json.dump(self.db_versions, fid)
Expand Down Expand Up @@ -441,7 +442,7 @@ def main():
parser.add_argument("-r", "--remove_unused", action='store_true', dest='remove_unused', required=False, help="for -m mode only, remove database files under /mnt databases directory that are NOT used in config_dbs.json")

arguments = parser.parse_args()
DBManager = ARAXDatabaseManager()
DBManager = ARAXDatabaseManager(allow_downloads=True)

print(f"Local paths:")
for db_name, path in DBManager.local_paths.items():
Expand All @@ -457,13 +458,13 @@ def main():
if not DBManager.check_versions(debug=True):
print("All local versions are up to date")
elif arguments.force_download:
DBManager.force_download_all(debug=True)
DBManager._force_download_all(debug=True)
elif arguments.mnt:
DBManager.download_to_mnt(debug=True,
skip_if_exists=arguments.skip_if_exists,
remove_unused=arguments.remove_unused)
DBManager._download_to_mnt(debug=True,
skip_if_exists=arguments.skip_if_exists,
remove_unused=arguments.remove_unused)
elif arguments.generate_versions_file:
DBManager.write_db_versions_file(debug=True)
DBManager._write_db_versions_file(debug=True)
else:
DBManager.update_databases(debug=True)

Expand Down
10 changes: 2 additions & 8 deletions code/ARAX/NodeSynonymizer/node_synonymizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,9 @@ def __init__(self):
self.sri_nn_infores_curie = "infores:sri-node-normalizer"
self.arax_infores_curie = "infores:arax"

# If the database doesn't seem to exist, try running the DatabaseManager
if not pathlib.Path(self.database_path).exists():
print(f"Synonymizer not present at {self.database_path}; attempting to download with database manager..")
db_manager = ARAXDatabaseManager()
db_manager.update_databases()

if not pathlib.Path(self.database_path).exists():
raise ValueError(f"Synonymizer specified in config_dbs file does not exist locally, even after "
f"running the database manager! It should be at: {self.database_path}")
raise ValueError(f"Synonymizer specified in config_dbs file does not exist locally."
f" It should be at: {self.database_path}")
else:
self.db_connection = sqlite3.connect(self.database_path)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def main():

RTXConfiguration()

dbmanager = ARAXDatabaseManager()
dbmanager = ARAXDatabaseManager(allow_downloads=True)
try:
logging.info("Checking for complete databases")
if dbmanager.check_versions():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def main():

RTXConfiguration()

dbmanager = ARAXDatabaseManager()
dbmanager = ARAXDatabaseManager(allow_downloads=True)
try:
logging.info("Checking for complete databases")
if dbmanager.check_versions():
Expand Down

0 comments on commit 63e0449

Please sign in to comment.