diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 2473c74d..c5cf5eb9 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -228,11 +228,16 @@ class CheckValidPetabIdColumn(ValidationTask): """A task to check that a given column contains only valid PEtab IDs.""" def __init__( - self, table_name: str, column_name: str, required_column: bool = True + self, + table_name: str, + column_name: str, + required_column: bool = True, + ignore_nan: bool = False, ): self.table_name = table_name self.column_name = column_name self.required_column = required_column + self.ignore_nan = ignore_nan def run(self, problem: Problem) -> ValidationIssue | None: df = getattr(problem, f"{self.table_name}_df") @@ -248,7 +253,10 @@ def run(self, problem: Problem) -> ValidationIssue | None: return try: - check_ids(df[self.column_name].values, kind=self.column_name) + ids = df[self.column_name].values + if self.ignore_nan: + ids = ids[~pd.isna(ids)] + check_ids(ids, kind=self.column_name) except ValueError as e: return ValidationError(str(e)) @@ -308,21 +316,26 @@ def run(self, problem: Problem) -> ValidationIssue | None: except AssertionError as e: return ValidationError(str(e)) - # TODO: introduce some option for validation partial vs full + # TODO: introduce some option for validation of partial vs full # problem. if this is supposed to be a complete problem, a missing # condition table should be an error if the measurement table refers - # to conditions - - # check that measured experiments - if problem.experiment_df is None: - return - + # to conditions, otherwise it should maximally be a warning used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values) - available_experiments = set( - problem.experiment_df[EXPERIMENT_ID].unique() + # handle default-experiment + used_experiments = set( + filter( + lambda x: not pd.isna(x), + used_experiments, + ) + ) + # check that measured experiments exist + available_experiments = ( + set(problem.experiment_df[EXPERIMENT_ID].unique()) + if problem.experiment_df is not None + else set() ) if missing_experiments := (used_experiments - available_experiments): - raise AssertionError( + return ValidationError( "Measurement table references experiments that " "are not specified in the experiments table: " + str(missing_experiments) @@ -826,6 +839,7 @@ def append_overrides(overrides): CheckMeasurementTable(), CheckConditionTable(), CheckExperimentTable(), + CheckValidPetabIdColumn("measurement", EXPERIMENT_ID, ignore_nan=True), CheckValidPetabIdColumn("experiment", EXPERIMENT_ID), CheckValidPetabIdColumn("experiment", CONDITION_ID), CheckExperimentConditionsExist(), diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 78304328..a5690882 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -9,14 +9,11 @@ import pandas as pd from pandas.io.common import get_handle, is_url -import petab.v1.C -from petab.models import MODEL_TYPE_SBML -from petab.v1 import Problem as ProblemV1 -from petab.yaml import get_path_prefix - from .. import v1, v2 -from ..v1.yaml import load_yaml, validate, write_yaml +from ..v1 import Problem as ProblemV1 +from ..v1.yaml import get_path_prefix, load_yaml, validate, write_yaml from ..versions import get_major_version +from .models import MODEL_TYPE_SBML __all__ = ["petab1to2"] @@ -63,18 +60,18 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): if get_major_version(yaml_config) != 1: raise ValueError("PEtab problem is not version 1.") petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config) + # get rid of conditionName column if present (unsupported in v2) + petab_problem.condition_df = petab_problem.condition_df.drop( + columns=[v1.C.CONDITION_NAME], errors="ignore" + ) if v1.lint_problem(petab_problem): raise ValueError("Provided PEtab problem does not pass linting.") + output_dir = Path(output_dir) + # Update YAML file new_yaml_config = _update_yaml(yaml_config) - # Write new YAML file - output_dir = Path(output_dir) - output_dir.mkdir(parents=True, exist_ok=True) - new_yaml_file = output_dir / Path(yaml_file).name - write_yaml(new_yaml_config, new_yaml_file) - # Update tables # condition tables, observable tables, SBML files, parameter table: # no changes - just copy @@ -104,6 +101,19 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: if not sim_cond_id and not preeq_cond_id: return "" + # check whether the conditions will exist in the v2 condition table + sim_cond_exists = ( + petab_problem.condition_df.loc[sim_cond_id].notna().any() + ) + preeq_cond_exists = ( + preeq_cond_id + and petab_problem.condition_df.loc[preeq_cond_id].notna().any() + ) + if not sim_cond_exists and not preeq_cond_exists: + # if we have only all-NaN conditions, we don't create a new + # experiment + return "" + if preeq_cond_id: preeq_cond_id = f"{preeq_cond_id}_" exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}" @@ -126,6 +136,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID] preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "") exp_id = create_experiment_id(sim_cond_id, preeq_cond_id) + if not exp_id: + continue if preeq_cond_id: experiments.append( { @@ -165,10 +177,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: # add pre-eq condition id if not present or convert to string # for simplicity if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns: - measurement_df[ - v1.C.PREEQUILIBRATION_CONDITION_ID - ] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype( - str + measurement_df.fillna( + {v1.C.PREEQUILIBRATION_CONDITION_ID: ""}, inplace=True ) else: measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = "" @@ -177,7 +187,7 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: petab_problem.condition_df is not None and len( set(petab_problem.condition_df.columns) - - {petab.v1.C.CONDITION_NAME} + - {v1.C.CONDITION_NAME} ) == 0 ): @@ -209,6 +219,10 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: measurement_df, get_dest_path(measurement_file) ) + # Write new YAML file + new_yaml_file = output_dir / Path(yaml_file).name + write_yaml(new_yaml_config, new_yaml_file) + # validate updated Problem validation_issues = v2.lint_problem(new_yaml_file)