From bd52759972e35a079a9fcdc5d73db991dc763297 Mon Sep 17 00:00:00 2001 From: Anita Caron Date: Thu, 21 Sep 2023 11:07:43 +0100 Subject: [PATCH 1/4] check indirect relations for `connected to` and `develops from` --- src/template_generation_tools.py | 32 ++++++++++++++++++++++++++++++++ src/uberongraph_tools.py | 31 ++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/template_generation_tools.py b/src/template_generation_tools.py index 5dd49cfc7..38eb4f9b4 100644 --- a/src/template_generation_tools.py +++ b/src/template_generation_tools.py @@ -248,6 +248,22 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict): valid_ct_as_conn_to, terms_ct_as = ug.verify_relationship(terms_ct_as, ug.select_ct) records, valid_as, valid_ct = add_rows(records, valid_as, valid_ct, valid_conn_to.union(valid_ct_as_conn_to), 'connected_to') + + # INDIRECT CONNECTED TO CHECK + valid_conn_to_nr, _ = ug.verify_relationship(transform_to_str(valid_conn_to), ug.select_ct_nonredundant) + + terms_s, terms_o = split_terms(transform_to_str(valid_conn_to - valid_conn_to_nr)) + + rows_vctnr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))] + + # ADD RESULTS TO INDIRECT LOG + valid_error_log = pd.concat([valid_error_log, rows_vctnr]) + + for _, r in rows_vctnr.iterrows(): + if 'UBERON' in r['s'] and 'UBERON' in r['o']: + indirect_as.add((r['s'], r['o'])) + elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']): + indirect_ct.add((r['s'], r['o'])) # CONTINUOUS WITH CHECK valid_cont_with, terms_pairs = ug.verify_relationship(terms_pairs, ug.select_continuous_with) @@ -264,6 +280,22 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict): # DEVELOPS FROM CHECK valid_dev_from, terms_pairs = ug.verify_relationship(terms_pairs, ug.select_develops_from) records, valid_as, valid_ct = add_rows(records, valid_as, valid_ct, valid_dev_from, 'develops_from') + + # INDIRECT DEVELOPS FROM CHECK + valid_dev_nr, _ = ug.verify_relationship(transform_to_str(valid_dev_from), ug.select_dev_from_nonredundant) + print(valid_dev_from, valid_dev_nr) + terms_s, terms_o = split_terms(transform_to_str(valid_dev_from - valid_dev_nr)) + + rows_vdfnr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))] + + # ADD RESULTS TO INDIRECT LOG + valid_error_log = pd.concat([valid_error_log, rows_vdfnr]) + + for _, r in rows_vdfnr.iterrows(): + if 'UBERON' in r['s'] and 'UBERON' in r['o']: + indirect_as.add((r['s'], r['o'])) + elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']): + indirect_ct.add((r['s'], r['o'])) # AS-CT HAS PART valid_has_part, terms_ct_as = ug.verify_relationship(terms_ct_as, ug.select_has_part) diff --git a/src/uberongraph_tools.py b/src/uberongraph_tools.py index 7507ddb0a..1562efab2 100644 --- a/src/uberongraph_tools.py +++ b/src/uberongraph_tools.py @@ -71,7 +71,6 @@ def __init__(self): PREFIX CL: PREFIX PCL: SELECT ?subject ?object - FROM FROM { VALUES (?subject ?object) { @@ -79,6 +78,20 @@ def __init__(self): } ?subject connected_to: ?object . }""" + + self.select_ct_nonredundant = """ + PREFIX connected_to: + PREFIX UBERON: + PREFIX CL: + PREFIX PCL: + SELECT ?subject ?object + FROM + { + VALUES (?subject ?object) { + %s + } + ?subject connected_to: ?object . + }""" self.select_label = """ PREFIX rdfs: @@ -110,6 +123,22 @@ def __init__(self): ?subject develops_from: ?object . } """ + + self.select_dev_from_nonredundant = """ + PREFIX develops_from: + PREFIX owl: + PREFIX UBERON: + PREFIX CL: + PREFIX PCL: + SELECT ?subject ?object + FROM + { + VALUES (?subject ?object) { + %s + } + ?subject develops_from: ?object . + } + """ self.select_po_nonredundant = """ PREFIX part_of: From cf54d0390c66c5fe8d1f8b474918e49cd1729195 Mon Sep 17 00:00:00 2001 From: Anita Caron Date: Thu, 21 Sep 2023 11:28:18 +0100 Subject: [PATCH 2/4] fix indirect logs --- src/template_generation_tools.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/template_generation_tools.py b/src/template_generation_tools.py index 38eb4f9b4..274cf98f8 100644 --- a/src/template_generation_tools.py +++ b/src/template_generation_tools.py @@ -178,7 +178,7 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict): # INDIRECT SUBCLASS CHECK valid_subclass_onto, _ = ug.verify_relationship(transform_to_str(valid_subclass), ug.select_subclass_ontology) - terms_s, terms_o = split_terms(transform_to_str(valid_subclass - valid_subclass_onto)) + terms_s, terms_o = split_terms(transform_to_str(valid_subclass_onto)) rows_nvso = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))] @@ -202,7 +202,7 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict): valid_po_nr, _ = ug.verify_relationship(terms_valid_po, ug.select_po_nonredundant) - terms_s, terms_o = split_terms(transform_to_str(valid_po - valid_po_nr)) + terms_s, terms_o = split_terms(transform_to_str(valid_po_nr)) rows_nvponr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))] @@ -224,7 +224,7 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict): # INDIRECT OVERLAPS CHECK valid_o_nr, _ = ug.verify_relationship(transform_to_str(valid_overlaps), ug.select_overlaps_nonredundant) - terms_s, terms_o = split_terms(transform_to_str(valid_overlaps - valid_o_nr)) + terms_s, terms_o = split_terms(transform_to_str(valid_o_nr)) rows_nvonr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))] @@ -252,7 +252,7 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict): # INDIRECT CONNECTED TO CHECK valid_conn_to_nr, _ = ug.verify_relationship(transform_to_str(valid_conn_to), ug.select_ct_nonredundant) - terms_s, terms_o = split_terms(transform_to_str(valid_conn_to - valid_conn_to_nr)) + terms_s, terms_o = split_terms(transform_to_str(valid_conn_to_nr)) rows_vctnr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))] @@ -284,7 +284,7 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict): # INDIRECT DEVELOPS FROM CHECK valid_dev_nr, _ = ug.verify_relationship(transform_to_str(valid_dev_from), ug.select_dev_from_nonredundant) print(valid_dev_from, valid_dev_nr) - terms_s, terms_o = split_terms(transform_to_str(valid_dev_from - valid_dev_nr)) + terms_s, terms_o = split_terms(transform_to_str(valid_dev_nr)) rows_vdfnr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))] From becb967f09c194791a88c67c9b54d8e33b9fb276 Mon Sep 17 00:00:00 2001 From: Anita Caron Date: Thu, 21 Sep 2023 11:49:04 +0100 Subject: [PATCH 3/4] add indirect check to `located in` and `continuous with` --- src/template_generation_tools.py | 32 +++++++++++++++++++++++++++++++ src/uberongraph_tools.py | 33 +++++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 1 deletion(-) diff --git a/src/template_generation_tools.py b/src/template_generation_tools.py index 274cf98f8..ed5bcb15e 100644 --- a/src/template_generation_tools.py +++ b/src/template_generation_tools.py @@ -242,6 +242,22 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict): records, valid_as, valid_ct = add_rows(records, valid_as, valid_ct, valid_ct_as_locatedin, 'located_in') terms_ct_as = terms_ct_as - transform_to_str(valid_ct_as_locatedin) + + # INDIRECT LOCATED IN CHECK + valid_loc_in_nr, _ = ug.verify_relationship(transform_to_str(valid_ct_as_locatedin), ug.select_li_nonredundant) + + terms_s, terms_o = split_terms(transform_to_str(valid_loc_in_nr)) + + rows_vlinr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))] + + # ADD RESULTS TO INDIRECT LOG + valid_error_log = pd.concat([valid_error_log, rows_vlinr]) + + for _, r in rows_vlinr.iterrows(): + if 'UBERON' in r['s'] and 'UBERON' in r['o']: + indirect_as.add((r['s'], r['o'])) + elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']): + indirect_ct.add((r['s'], r['o'])) # CONNECTED TO CHECK valid_conn_to, terms_pairs = ug.verify_relationship(terms_pairs, ug.select_ct) @@ -269,6 +285,22 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict): valid_cont_with, terms_pairs = ug.verify_relationship(terms_pairs, ug.select_continuous_with) records, valid_as, valid_ct = add_rows(records, valid_as, valid_ct, valid_cont_with, 'continuous_with') + + # INDIRECT CONTINUOUS WITH CHECK + valid_cont_with_nr, _ = ug.verify_relationship(transform_to_str(valid_cont_with), ug.select_cw_nonredundant) + + terms_s, terms_o = split_terms(transform_to_str(valid_cont_with_nr)) + + rows_vctnr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))] + + # ADD RESULTS TO INDIRECT LOG + valid_error_log = pd.concat([valid_error_log, rows_vctnr]) + + for _, r in rows_vctnr.iterrows(): + if 'UBERON' in r['s'] and 'UBERON' in r['o']: + indirect_as.add((r['s'], r['o'])) + elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']): + indirect_ct.add((r['s'], r['o'])) # STRICT CT-AS REPORT terms_s, terms_o = split_terms(terms_ct_as) diff --git a/src/uberongraph_tools.py b/src/uberongraph_tools.py index 1562efab2..b79d24ad1 100644 --- a/src/uberongraph_tools.py +++ b/src/uberongraph_tools.py @@ -263,7 +263,24 @@ def __init__(self): } ?subject located_in: ?object . } - """ + """ + + self.select_li_nonredundant = """ + PREFIX rdfs: + PREFIX UBERON: + PREFIX CL: + PREFIX PCL: + PREFIX located_in: + SELECT ?subject ?object + FROM + { + VALUES (?subject ?object) { + %s + } + ?subject located_in: ?object . + } + """ + self.select_normalized_ic = """ PREFIX normalizedIC: PREFIX UBERON: @@ -292,6 +309,20 @@ def __init__(self): ?subject continuous_with: ?object . } """ + self.select_cw_nonredundant = """ + PREFIX continuous_with: + PREFIX UBERON: + PREFIX CL: + + SELECT ?subject ?object + FROM + { + VALUES (?subject ?object) { + %s + } + ?subject continuous_with: ?object . + } + """ def ask_uberon(self, r, q, urls=True): """""" From ebed33295050a1e62d0c1c1ba6edb2c57dca9223 Mon Sep 17 00:00:00 2001 From: Anita Caron Date: Wed, 27 Sep 2023 10:23:05 +0100 Subject: [PATCH 4/4] refact indirect reports generation --- src/ccf_tools.py | 10 ++++++++++ src/template_generation_tools.py | 22 ++++++++++++---------- src/uberongraph_tools.py | 15 +++++++++++++-- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/ccf_tools.py b/src/ccf_tools.py index 02b486bcf..72bb91074 100644 --- a/src/ccf_tools.py +++ b/src/ccf_tools.py @@ -280,3 +280,13 @@ def add_rows(records, valid_as, valid_ct, pairs, relation, inverse=False): elif 'CL' in s and 'CL' in o: valid_ct.add((s,o)) return records, valid_as, valid_ct + +def add_indirect_nb(valid_error_log, indirect_as, indirect_ct, report): + valid_error_log = pd.concat([valid_error_log, report]) + for _, r in report.iterrows(): + if 'UBERON' in r['s'] and 'UBERON' in r['o']: + indirect_as.add((r['s'], r['o'])) + elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']): + indirect_ct.add((r['s'], r['o'])) + + return valid_error_log, indirect_as, indirect_ct \ No newline at end of file diff --git a/src/template_generation_tools.py b/src/template_generation_tools.py index ed5bcb15e..5aa4c6923 100644 --- a/src/template_generation_tools.py +++ b/src/template_generation_tools.py @@ -1,7 +1,7 @@ import pandas as pd from rdflib.graph import ConjunctiveGraph from uberongraph_tools import UberonGraph -from ccf_tools import chunks, split_terms, transform_to_str, add_rows +from ccf_tools import chunks, split_terms, transform_to_str, add_rows, add_indirect_nb import logging # logger = logging.getLogger('ASCT-b Tables Log') @@ -176,20 +176,22 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict): records, valid_as, valid_ct = add_rows(records, valid_as, valid_ct, valid_subclass.union(valid_ct_as_subclass), 'isa') # INDIRECT SUBCLASS CHECK - valid_subclass_onto, _ = ug.verify_relationship(transform_to_str(valid_subclass), ug.select_subclass_ontology) + terms_s, terms_o = ug.check_indirect_rel(valid_subclass, ug.select_subclass_ontology) +# valid_subclass_onto, _ = ug.verify_relationship(transform_to_str(valid_subclass), ug.select_subclass_ontology) - terms_s, terms_o = split_terms(transform_to_str(valid_subclass_onto)) +# terms_s, terms_o = split_terms(transform_to_str(valid_subclass_onto)) rows_nvso = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))] # ADD RESULTS TO INDIRECT LOG - valid_error_log = pd.concat([valid_error_log, rows_nvso]) - - for _, r in rows_nvso.iterrows(): - if 'UBERON' in r['s'] and 'UBERON' in r['o']: - indirect_as.add((r['s'], r['o'])) - elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']): - indirect_ct.add((r['s'], r['o'])) + valid_error_log, indirect_as, indirect_ct = add_indirect_nb(valid_error_log, indirect_as, indirect_ct, rows_nvso) +# valid_error_log = pd.concat([valid_error_log, rows_nvso]) + +# for _, r in rows_nvso.iterrows(): +# if 'UBERON' in r['s'] and 'UBERON' in r['o']: +# indirect_as.add((r['s'], r['o'])) +# elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']): +# indirect_ct.add((r['s'], r['o'])) # PART OF CHECK valid_po, terms_pairs = ug.verify_relationship(terms_pairs, ug.select_po) diff --git a/src/uberongraph_tools.py b/src/uberongraph_tools.py index b79d24ad1..b20d9c727 100644 --- a/src/uberongraph_tools.py +++ b/src/uberongraph_tools.py @@ -1,6 +1,6 @@ from SPARQLWrapper import SPARQLWrapper, JSON, RDFXML from rdflib.graph import ConjunctiveGraph -from ccf_tools import chunks, transform_to_str +from ccf_tools import chunks, transform_to_str, split_terms class UberonGraph(): def __init__(self): @@ -518,4 +518,15 @@ def get_annotations(self, terms): terms = "\n".join(terms) annotations = self.construct_annotation(terms) - return annotations \ No newline at end of file + return annotations + + def check_indirect_rel(self, valid_rel, query): + valid_indirect, _ = self.verify_relationship(transform_to_str(valid_rel), query) + + terms_s, terms_o = split_terms(transform_to_str(valid_indirect)) + + return terms_s, terms_o + + + + \ No newline at end of file