diff --git a/code/ARAX/ARAXQuery/ARAX_database_manager.py b/code/ARAX/ARAXQuery/ARAX_database_manager.py index c219ed231..b2d5a146e 100644 --- a/code/ARAX/ARAXQuery/ARAX_database_manager.py +++ b/code/ARAX/ARAXQuery/ARAX_database_manager.py @@ -2,7 +2,7 @@ # NOTE: this module is only to be used either as a CLI script or in the # __main__.py Flask application at application start-up. Please do not -# instantiate this class and call `check_databases` at query time. -SAR +# instantiate this class and call `update_databases` at query time. -SAR import os import sys @@ -22,9 +22,9 @@ def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) class ARAXDatabaseManager: - def __init__(self): + def __init__(self, allow_downloads=False): self.RTXConfig = RTXConfiguration() - + self.allow_downloads = allow_downloads pathlist = os.path.realpath(__file__).split(os.path.sep) RTXindex = pathlist.index("RTX") @@ -209,10 +209,12 @@ def __init__(self): } def update_databases(self, debug = True, response = None): + if not self.allow_downloads: + raise ValueError("in ARAXDatabaseManager, update_databases called with self.allow_downloads=False") debug = True # First ensure we have a db versions file if we're in a docker container (since host has dbs predownloaded) if os.path.exists(self.docker_databases_dir_path) and not os.path.exists(versions_path): - self.write_db_versions_file(debug=True) + self._write_db_versions_file(debug=True) # Then ensure each database/symlink is up to date if os.path.exists(versions_path): @@ -222,7 +224,7 @@ def update_databases(self, debug = True, response = None): # Download databases to a persistent central location if this is a docker instance (like arax.ncats.io) if os.path.exists(self.docker_databases_dir_path): eprint(f"Downloading any missing databases from arax-databases.rtx.ai to {self.docker_databases_dir_path}") - self.download_to_mnt(debug=debug, skip_if_exists=True, remove_unused=False) + self._download_to_mnt(debug=debug, skip_if_exists=True, remove_unused=False) # Check that each database exists locally (or a symlink to it does, in the case of a docker host machine) for database_name, local_path in self.local_paths.items(): # iterate through all databases @@ -231,20 +233,20 @@ def update_databases(self, debug = True, response = None): eprint(f"{database_name} ({local_path}) not present locally, downloading or symlinking now...") if response is not None: response.debug(f"Updating the local file for {database_name}...") - self.download_database(remote_location=self.remote_locations[database_name], - local_destination_path=self.local_paths[database_name], - local_symlink_target_path=self.docker_central_paths[database_name], - debug=debug) + self._download_database(remote_location=self.remote_locations[database_name], + local_destination_path=self.local_paths[database_name], + local_symlink_target_path=self.docker_central_paths[database_name], + debug=debug) elif local_versions[database_name]['version'] != self.db_versions[database_name]['version']: # If database is present but wrong version if debug: eprint(f"{database_name} has a local version, '{local_versions[database_name]['version']}', which does not match the remote version, '{self.db_versions[database_name]['version']}'.") eprint("downloading remote version...") if response is not None: response.debug(f"Updating the local file for {database_name}...") - self.download_database(remote_location=self.remote_locations[database_name], - local_destination_path=self.local_paths[database_name], - local_symlink_target_path=self.docker_central_paths[database_name], - debug=debug) + self._download_database(remote_location=self.remote_locations[database_name], + local_destination_path=self.local_paths[database_name], + local_symlink_target_path=self.docker_central_paths[database_name], + debug=debug) if os.path.exists(self.local_paths[database_name]): # check that download worked if so remove old version if debug: eprint("Download successful. Removing local version...") @@ -261,18 +263,21 @@ def update_databases(self, debug = True, response = None): eprint(f"{database_name} ({self.local_paths[database_name]}) not present locally, downloading or symlinking now......") if response is not None: response.debug(f"Updating the local file for {database_name}...") - self.download_database(remote_location=self.remote_locations[database_name], local_destination_path=self.local_paths[database_name], local_symlink_target_path=self.docker_central_paths[database_name], debug=debug) + self._download_database(remote_location=self.remote_locations[database_name], + local_destination_path=self.local_paths[database_name], + local_symlink_target_path=self.docker_central_paths[database_name], + debug=debug) else: if debug: eprint(f"Local version of {database_name} ({local_path}) matches the remote version, skipping...") - self.write_db_versions_file() + self._write_db_versions_file() else: # If database manager has never been run download all databases if debug: eprint("No local verson json file present. Downloading all databases...") if response is not None: response.debug(f"No local verson json file present. Downloading all databases...") - self.force_download_all(debug=debug) - self.write_db_versions_file() + self._force_download_all(debug=debug) + self._write_db_versions_file() return response @staticmethod @@ -330,7 +335,7 @@ def check_date(self, file_path, max_days = 31): else: return True - def download_database(self, remote_location, local_destination_path, local_symlink_target_path, debug=False): + def _download_database(self, remote_location, local_destination_path, local_symlink_target_path, debug=False): if local_symlink_target_path is not None and os.path.exists(local_symlink_target_path): # if on the server symlink instead of downloading self.symlink_database(symlink_path=local_destination_path, target_path=local_symlink_target_path) else: @@ -346,7 +351,7 @@ def rsync_database(self, remote_location, local_path, debug=False): #os.system(f"rsync -Lhzc{verbose} --progress {remote_location} {local_path}") eprint(f"ERROR: Wanted to run the following rsync, but it isn't going to work anyway. Skipping: rsync -Lhzc{verbose} --progress {remote_location} {local_path}") - def download_to_mnt(self, debug=False, skip_if_exists=False, remove_unused=False): + def _download_to_mnt(self, debug=False, skip_if_exists=False, remove_unused=False): """ This method downloads databases to the docker host machine in a central location. """ @@ -355,7 +360,7 @@ def download_to_mnt(self, debug=False, skip_if_exists=False, remove_unused=False for database_name in self.remote_locations.keys(): database_dir = os.path.sep.join(self.docker_central_paths[database_name].split('/')[:-1]) if debug: - print(f"On database {database_name} in download_to_mnt()") + print(f"On database {database_name} in _download_to_mnt()") if not os.path.exists(database_dir): if debug: print(f"Creating directory {database_dir}...") @@ -366,18 +371,21 @@ def download_to_mnt(self, debug=False, skip_if_exists=False, remove_unused=False if debug: print(f"Initiating download from location {remote_location}; " f"saving to {docker_host_local_path}") - self.download_database(remote_location=remote_location, - local_destination_path=docker_host_local_path, - local_symlink_target_path=None, - debug=debug) + self._download_database(remote_location=remote_location, + local_destination_path=docker_host_local_path, + local_symlink_target_path=None, + debug=debug) else: print(f" Database already exists, no need to download") if debug else None - - def force_download_all(self, debug=False): + + def _force_download_all(self, debug=False): for database_name in self.remote_locations.keys(): if debug: print(f"Downloading {self.remote_locations[database_name].split('/')[-1]}...") - self.download_database(remote_location=self.remote_locations[database_name], local_destination_path=self.local_paths[database_name], local_symlink_target_path=self.docker_central_paths[database_name], debug=debug) + self._download_database(remote_location=self.remote_locations[database_name], + local_destination_path=self.local_paths[database_name], + local_symlink_target_path=self.docker_central_paths[database_name], + debug=debug) def check_all(self, max_days=31, debug=False): update_flag = False @@ -402,14 +410,7 @@ def check_all(self, max_days=31, debug=False): return True return update_flag - def update_databases_by_date(self, max_days=31, debug=False): - for database_name, local_path in self.local_paths.items(): - if self.check_date(local_path, max_days=max_days): - if debug: - print(f"{database_name} not present or older than {max_days} days. Updating file...") - self.download_database(remote_location=self.remote_locations[database_name], local_destination_path=local_path, local_symlink_target_path=self.docker_central_paths[database_name], debug=debug) - - def write_db_versions_file(self, debug=False): + def _write_db_versions_file(self, debug=False): print(f"saving new version file to {versions_path}") if debug else None with open(versions_path, "w") as fid: json.dump(self.db_versions, fid) @@ -441,7 +442,7 @@ def main(): parser.add_argument("-r", "--remove_unused", action='store_true', dest='remove_unused', required=False, help="for -m mode only, remove database files under /mnt databases directory that are NOT used in config_dbs.json") arguments = parser.parse_args() - DBManager = ARAXDatabaseManager() + DBManager = ARAXDatabaseManager(allow_downloads=True) print(f"Local paths:") for db_name, path in DBManager.local_paths.items(): @@ -457,13 +458,13 @@ def main(): if not DBManager.check_versions(debug=True): print("All local versions are up to date") elif arguments.force_download: - DBManager.force_download_all(debug=True) + DBManager._force_download_all(debug=True) elif arguments.mnt: - DBManager.download_to_mnt(debug=True, - skip_if_exists=arguments.skip_if_exists, - remove_unused=arguments.remove_unused) + DBManager._download_to_mnt(debug=True, + skip_if_exists=arguments.skip_if_exists, + remove_unused=arguments.remove_unused) elif arguments.generate_versions_file: - DBManager.write_db_versions_file(debug=True) + DBManager._write_db_versions_file(debug=True) else: DBManager.update_databases(debug=True) diff --git a/code/ARAX/NodeSynonymizer/node_synonymizer.py b/code/ARAX/NodeSynonymizer/node_synonymizer.py index 25ce89f3a..0353d3ee7 100644 --- a/code/ARAX/NodeSynonymizer/node_synonymizer.py +++ b/code/ARAX/NodeSynonymizer/node_synonymizer.py @@ -40,15 +40,9 @@ def __init__(self): self.sri_nn_infores_curie = "infores:sri-node-normalizer" self.arax_infores_curie = "infores:arax" - # If the database doesn't seem to exist, try running the DatabaseManager if not pathlib.Path(self.database_path).exists(): - print(f"Synonymizer not present at {self.database_path}; attempting to download with database manager..") - db_manager = ARAXDatabaseManager() - db_manager.update_databases() - - if not pathlib.Path(self.database_path).exists(): - raise ValueError(f"Synonymizer specified in config_dbs file does not exist locally, even after " - f"running the database manager! It should be at: {self.database_path}") + raise ValueError(f"Synonymizer specified in config_dbs file does not exist locally." + f" It should be at: {self.database_path}") else: self.db_connection = sqlite3.connect(self.database_path) diff --git a/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py b/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py index 99807759a..3cfb193d0 100644 --- a/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py +++ b/code/UI/OpenAPI/python-flask-server/KG2/openapi_server/__main__.py @@ -86,7 +86,7 @@ def main(): RTXConfiguration() - dbmanager = ARAXDatabaseManager() + dbmanager = ARAXDatabaseManager(allow_downloads=True) try: logging.info("Checking for complete databases") if dbmanager.check_versions(): diff --git a/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py b/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py index 102d0ebc9..408e0046c 100644 --- a/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py +++ b/code/UI/OpenAPI/python-flask-server/openapi_server/__main__.py @@ -86,7 +86,7 @@ def main(): RTXConfiguration() - dbmanager = ARAXDatabaseManager() + dbmanager = ARAXDatabaseManager(allow_downloads=True) try: logging.info("Checking for complete databases") if dbmanager.check_versions():