Skip to content

Commit

Permalink
docs: improve error messages for failed queries
Browse files Browse the repository at this point in the history
  • Loading branch information
DaniBodor committed Sep 4, 2024
1 parent 842b563 commit fa20cb2
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 10 deletions.
26 changes: 18 additions & 8 deletions deeprank2/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import deeprank2.features
from deeprank2.domain.aminoacidlist import convert_aa_nomenclature
from deeprank2.features import components, conservation, contact
from deeprank2.molstruct.residue import Residue, SingleResidueVariant
from deeprank2.molstruct.residue import SingleResidueVariant
from deeprank2.utils.buildgraph import get_contact_atoms, get_structure, get_surrounding_residues
from deeprank2.utils.graph import Graph
from deeprank2.utils.grid import Augmentation, GridSettings, MapMethod
Expand Down Expand Up @@ -265,12 +265,11 @@ def _build_helper(self) -> Graph:
structure = self._load_structure()

# find the variant residue and its surroundings
variant_residue: Residue = None
for residue in structure.get_chain(self.variant_chain_id).residues:
if residue.number == self.variant_residue_number and residue.insertion_code == self.insertion_code:
variant_residue = residue
break
if variant_residue is None:
else: # if break is not reached
msg = f"Residue not found in {self.pdb_path}: {self.variant_chain_id} {self.residue_id}"
raise ValueError(msg)
self.variant = SingleResidueVariant(variant_residue, self.variant_amino_acid)
Expand Down Expand Up @@ -453,7 +452,7 @@ def __iter__(self) -> Iterator[Query]:
def __len__(self) -> int:
return len(self._queries)

def _process_one_query(self, query: Query) -> None:
def _process_one_query(self, query: Query, log_error_traceback: bool = False) -> None:
"""Only one process may access an hdf5 file at a time."""
try:
output_path = f"{self._prefix}-{os.getpid()}.hdf5"
Expand All @@ -479,10 +478,12 @@ def _process_one_query(self, query: Query) -> None:

except (ValueError, AttributeError, KeyError, TimeoutError) as e:
_log.warning(
f"\nGraph/Query with ID {query.get_query_id()} ran into an Exception ({e.__class__.__name__}: {e}),"
" and it has not been written to the hdf5 file. More details below:",
f"Graph/Query with ID {query.get_query_id()} ran into an Exception and was not written to the hdf5 file.\n"
f"Exception found: {e.__class__.__name__}: {e}.\n"
"You may proceed with your analysis, but this query will be ignored.\n",
)
_log.exception(e)
if log_error_traceback:
_log.exception(f"----Full error traceback:----\n{e}")

def process(
self,
Expand All @@ -493,6 +494,7 @@ def process(
grid_settings: GridSettings | None = None,
grid_map_method: MapMethod | None = None,
grid_augmentation_count: int = 0,
log_error_traceback: bool = False,
) -> list[str]:
"""Render queries into graphs (and optionally grids).
Expand All @@ -510,6 +512,8 @@ def process(
grid_settings: If valid together with `grid_map_method`, the grid data will be stored as well. Defaults to None.
grid_map_method: If valid together with `grid_settings`, the grid data will be stored as well. Defaults to None.
grid_augmentation_count: Number of grid data augmentations (must be >= 0). Defaults to 0.
log_error_traceback: if True, logs full error message in case query fails. Otherwise only the error message is logged.
Defaults to false.
Returns:
The list of paths of the generated HDF5 files.
Expand All @@ -536,7 +540,7 @@ def process(
self._grid_augmentation_count = grid_augmentation_count

_log.info(f"Creating pool function to process {len(self)} queries...")
pool_function = partial(self._process_one_query)
pool_function = partial(self._process_one_query, log_error_traceback=log_error_traceback)
with Pool(self._cpu_count) as pool:
_log.info("Starting pooling...\n")
pool.map(pool_function, self.queries)
Expand All @@ -551,6 +555,12 @@ def process(
os.remove(output_path)
return glob(f"{prefix}.hdf5")

if not output_paths:
msg = "No queries have been processed."
raise ValueError(msg)
if (n_processed := len(output_paths)) != (n_total := len(self.queries)):
_log.warning(f"Not all queries have been processed. You can proceed with the analysis of {n_processed}/{n_total} queries.")

return output_paths

def _set_feature_modules(self, feature_modules: list[ModuleType, str] | ModuleType | str) -> list[str]:
Expand Down
4 changes: 2 additions & 2 deletions tutorials/data_generation_srv.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -98,7 +98,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down

0 comments on commit fa20cb2

Please sign in to comment.