Skip to content

Commit

Permalink
logic phenotype tests now running except for inverse which is not imp…
Browse files Browse the repository at this point in the history
…lemented yet
  • Loading branch information
a-hartens committed Nov 14, 2024
1 parent 287cf49 commit 6c2b507
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 36 deletions.
76 changes: 71 additions & 5 deletions phenex/phenotypes/computation_graph_phenotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,15 @@ def __init__(
expression: ComputationGraph,
return_date: Union[str, Phenotype],
name: str = None,
aggregation_index=["PERSON_ID"],
_operate_on: str = "boolean",
_populate: str = "value",
_reduce: bool = False,
):
super(ComputationGraphPhenotype, self).__init__()
self.computation_graph = expression
self.return_date = return_date
self.aggregation_index = aggregation_index
self._name = name
self._operate_on = _operate_on
self._populate = _populate
Expand Down Expand Up @@ -74,16 +76,32 @@ def _execute(self, tables: Dict[str, Table]) -> PhenotypeTable:
_expression = self.computation_graph.get_value_expression(
joined_table, operate_on=self._operate_on
)
joined_table = joined_table.mutate(VALUE=_expression).mutate(
EVENT_DATE=ibis.null(date)
)
joined_table = joined_table.mutate(VALUE=_expression)
elif self._populate == "boolean":
_expression = self.computation_graph.get_boolean_expression(
joined_table, operate_on=self._operate_on
)
joined_table = joined_table.mutate(BOOLEAN=_expression).mutate(
EVENT_DATE=ibis.null(date)
joined_table = joined_table.mutate(BOOLEAN=_expression)

# Return the first or last event date
ibis.options.interactive = True
date_columns = self._coalesce_all_date_columns(joined_table)
if self.return_date == "first":
joined_table = joined_table.mutate(
EVENT_DATE=ibis.least(*date_columns)
)
elif self.return_date == "last":
joined_table = joined_table.mutate(
EVENT_DATE=ibis.greatest(*date_columns)
)
elif self.return_date == 'all':
joined_table = self._return_all_dates(joined_table, date_columns)
elif isinstance(self.return_date, Phenotype):
joined_table = joined_table.mutate(
EVENT_DATE=getattr(joined_table,f"{self.return_date.name}_EVENT_DATE")
)
else:
joined_table = joined_table.mutate(EVENT_DATE=ibis.null(date))

# Reduce the table to only include rows where the boolean column is True
if self._reduce:
Expand All @@ -96,6 +114,54 @@ def _execute(self, tables: Dict[str, Table]) -> PhenotypeTable:

return joined_table

def _return_all_dates(self, table, date_columns):
"""
If return date = all, we want to return all the dates on which phenotype criteria are fulfilled; this is a union of all the non-null dates in any leaf phenotype date columns.
Args:
table: The Ibis table object (e.g., joined_table) that contains all leaf phenotypes stacked horizontally
date_columns: List of base columns as ibis objects
Returns:
Ibis expression representing the UNION of all non null dates.
"""
# get all the non-null dates for each date column
non_null_dates_by_date_col = []
for date_col in date_columns:
non_null_dates = (
table.filter(date_col.notnull())
.mutate(EVENT_DATE=date_col)
)
non_null_dates_by_date_col.append(non_null_dates)

# do the union of all the non-null dates
all_dates = non_null_dates_by_date_col[0]
for non_null_dates in non_null_dates_by_date_col[1:]:
all_dates = all_dates.union(non_null_dates)
return all_dates

def _coalesce_all_date_columns(self, table):
"""
Generate COALESCE strings for the GREATEST or LEAST function using Ibis.
Args:
table: The Ibis table object (e.g., joined_table).
names: List of base column names (without the '_date' suffix).
Returns:
Ibis expression representing the COALESCE of the columns.
"""
coalesce_expressions = []

names = [col for col in table.columns if "EVENT_DATE" in col]

for i in range(len(names)):
rotated_names = names[i:] + names[:i]
coalesce_expr = ibis.coalesce(*(getattr(table,col) for col in rotated_names))
coalesce_expressions.append(coalesce_expr)

return coalesce_expressions


class ScorePhenotype(ComputationGraphPhenotype):
"""
Expand Down
9 changes: 7 additions & 2 deletions phenex/test/phenotype_test_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class PhenotypeTestGenerator:
name_space = ""
date_format = "%m-%d-%Y"
test_values = False
test_date = False
join_on = ["PERSON_ID"]

def run_tests(self, verbose=False):
self.verbose = verbose
Expand Down Expand Up @@ -77,7 +79,7 @@ def df_from_test_info(test_info):
df["PERSON_ID"] = test_info["persons"]

columnname_boolean = "boolean"
columnname_date = "DATE"
columnname_date = "EVENT_DATE"
columnname_value = "VALUE"

df[columnname_boolean] = True
Expand Down Expand Up @@ -126,13 +128,16 @@ def df_from_test_info(test_info):
self.name_output_file(test_info), df
)

join_on = ["PERSON_ID"]
join_on = ['PERSON_ID']
if self.test_values:
join_on.append("VALUE")
if self.test_date:
join_on.append("EVENT_DATE")
check_equality(
result_table,
expected_output_table,
test_name=test_info["name"],
test_values=self.test_values,
test_date = self.test_date,
join_on=join_on
)
58 changes: 34 additions & 24 deletions phenex/test/phenotypes/test_logic_phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def define_phenotype_tests(self):

class LogicPhenotypeReturnDateLastTestGenerator(PhenotypeTestGenerator):
name_space = "lgpt_returndate_last"
test_date = True

def define_input_tables(self):
"""
Expand Down Expand Up @@ -179,7 +180,7 @@ def define_input_tables(self):
"01-01-2022", # P7 c3 11
]
]
df["event_date"] = self.event_dates
df["EVENT_DATE"] = self.event_dates

df_person = pd.DataFrame()
df_person["PERSON_ID"] = list(df["PERSON_ID"].unique())
Expand Down Expand Up @@ -323,6 +324,8 @@ def define_phenotype_tests(self):

class LogicPhenotypeReturnDateAllTestGenerator(PhenotypeTestGenerator):
name_space = "lgpt_returndate_all"
test_date = True
join_on = ["PERSON_ID", "EVENT_DATE"]

def define_input_tables(self):
"""
Expand Down Expand Up @@ -369,7 +372,7 @@ def define_input_tables(self):
"01-01-2022", # P7 c3 11
]
]
df["event_date"] = self.event_dates
df["EVENT_DATE"] = self.event_dates

df_person = pd.DataFrame()
df_person["PERSON_ID"] = list(df["PERSON_ID"].unique())
Expand Down Expand Up @@ -502,12 +505,12 @@ def define_phenotype_tests(self):

test_infos = [
c1andc2,
# c1orc2,
# c1andc3,
# c1andc2orc1andc3,
# c1andc2andc1andc3,
# c1andc2orc3,
# c1andc2andc3,
c1orc2,
c1andc3,
c1andc2orc1andc3,
c1andc2andc1andc3,
c1andc2orc3,
c1andc2andc3,
]

for test_info in test_infos:
Expand All @@ -520,6 +523,7 @@ class LogicPhenotypeInverseReturnDateLastTestGenerator(
LogicPhenotypeReturnDateLastTestGenerator
):
name_space = "lgpt_inverse_returndate_last"
test_date = True

def define_phenotype_tests(self):
codelist_factory = LocalCSVCodelistFactory(
Expand Down Expand Up @@ -672,7 +676,7 @@ def define_phenotype_tests(self):

class LogicPhenotypeReturnDateFirstTestGenerator(PhenotypeTestGenerator):
name_space = "lgpt_returndate_first"

test_date = True
def define_input_tables(self):
"""
P1,c1,01-01-2022 0
Expand Down Expand Up @@ -718,7 +722,7 @@ def define_input_tables(self):
"01-01-2022", # P7 c3 11
]
]
df["event_date"] = self.event_dates
df["EVENT_DATE"] = self.event_dates

df_person = pd.DataFrame()
df_person["PERSON_ID"] = list(df["PERSON_ID"].unique())
Expand Down Expand Up @@ -860,25 +864,31 @@ def define_phenotype_tests(self):
return test_infos


def test_logic_phenotype():
import ibis
def test_logic_phenotype_1():
spg = LogicPhenotypeTestGenerator()
# spg.con = ibis.duckdb.connect()

# spg.run_tests()
spg.run_tests()

# spg = LogicPhenotypeReturnDateLastTestGenerator()
# spg.generate()
def test_logic_phenotype_2():
spg = LogicPhenotypeReturnDateLastTestGenerator()
spg.run_tests()

# spg = LogicPhenotypeInverseReturnDateLastTestGenerator()
# spg.generate()
def test_logic_phenotype_3():
spg = LogicPhenotypeReturnDateAllTestGenerator()
spg.run_tests()

# spg = LogicPhenotypeReturnDateAllTestGenerator()
# spg.generate()
def test_logic_phenotype_4():
spg = LogicPhenotypeReturnDateFirstTestGenerator()
spg.run_tests()

# spg = LogicPhenotypeReturnDateFirstTestGenerator()
# spg.generate()
def test_logic_phenotype_5():
pass
# spg = LogicPhenotypeInverseReturnDateLastTestGenerator()
# spg.run_tests()


if __name__ == "__main__":
test_logic_phenotype()
test_logic_phenotype_1()
test_logic_phenotype_2()
test_logic_phenotype_3()
test_logic_phenotype_4()
test_logic_phenotype_5()
17 changes: 12 additions & 5 deletions phenex/test/util/check_equality.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


def check_equality(
result, expected, join_on=["PERSON_ID"], test_name="test", test_values=False
result, expected, join_on=["PERSON_ID"], test_name="test", test_values=False, test_date=False
):
result = result.to_pandas()
result.loc[:, "DUMMY"] = 1
Expand All @@ -21,16 +21,23 @@ def check_equality(
), f"Expected not found in test {test_name}: {expected_not_found['PERSON_ID'].values}"

if test_values and 'VALUE' not in join_on:
print(full_results)
values_match = full_results["VALUE_result"] == full_results["VALUE_expected"]
print(values_match)
assert (
values_match.all()
), f"Found unexpected in test {test_name} : not all pairs match"
elif test_values and 'VALUE' in join_on:
print(full_results)
values_match = full_results["DUMMY_result"] == full_results["DUMMY_expected"]
print(values_match)
assert (
values_match.all()
), f"Found unexpected in test {test_name} : not all pairs match"

if test_date and 'EVENT_DATE' not in join_on:
dates_match = full_results["EVENT_DATE_result"] == full_results["EVENT_DATE_expected"]
assert (
dates_match.all()
), f"Found unexpected in test {test_name} : not all pairs match"
elif test_date and 'EVENT_DATE' in join_on:
dates_match = full_results["DUMMY_result"] == full_results["DUMMY_expected"]
assert (
dates_match.all()
), f"Found unexpected in test {test_name} : not all pairs match"

0 comments on commit 6c2b507

Please sign in to comment.