Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix indirect logs #271

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/ccf_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,3 +280,13 @@ def add_rows(records, valid_as, valid_ct, pairs, relation, inverse=False):
elif 'CL' in s and 'CL' in o:
valid_ct.add((s,o))
return records, valid_as, valid_ct

def add_indirect_nb(valid_error_log, indirect_as, indirect_ct, report):
valid_error_log = pd.concat([valid_error_log, report])
for _, r in report.iterrows():
if 'UBERON' in r['s'] and 'UBERON' in r['o']:
indirect_as.add((r['s'], r['o']))
elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']):
indirect_ct.add((r['s'], r['o']))

return valid_error_log, indirect_as, indirect_ct
88 changes: 77 additions & 11 deletions src/template_generation_tools.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd
from rdflib.graph import ConjunctiveGraph
from uberongraph_tools import UberonGraph
from ccf_tools import chunks, split_terms, transform_to_str, add_rows
from ccf_tools import chunks, split_terms, transform_to_str, add_rows, add_indirect_nb
import logging

# logger = logging.getLogger('ASCT-b Tables Log')
Expand Down Expand Up @@ -176,20 +176,22 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict):
records, valid_as, valid_ct = add_rows(records, valid_as, valid_ct, valid_subclass.union(valid_ct_as_subclass), 'isa')

# INDIRECT SUBCLASS CHECK
valid_subclass_onto, _ = ug.verify_relationship(transform_to_str(valid_subclass), ug.select_subclass_ontology)
terms_s, terms_o = ug.check_indirect_rel(valid_subclass, ug.select_subclass_ontology)
# valid_subclass_onto, _ = ug.verify_relationship(transform_to_str(valid_subclass), ug.select_subclass_ontology)

terms_s, terms_o = split_terms(transform_to_str(valid_subclass - valid_subclass_onto))
# terms_s, terms_o = split_terms(transform_to_str(valid_subclass_onto))

rows_nvso = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))]

# ADD RESULTS TO INDIRECT LOG
valid_error_log = pd.concat([valid_error_log, rows_nvso])
valid_error_log, indirect_as, indirect_ct = add_indirect_nb(valid_error_log, indirect_as, indirect_ct, rows_nvso)
# valid_error_log = pd.concat([valid_error_log, rows_nvso])

for _, r in rows_nvso.iterrows():
if 'UBERON' in r['s'] and 'UBERON' in r['o']:
indirect_as.add((r['s'], r['o']))
elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']):
indirect_ct.add((r['s'], r['o']))
# for _, r in rows_nvso.iterrows():
# if 'UBERON' in r['s'] and 'UBERON' in r['o']:
# indirect_as.add((r['s'], r['o']))
# elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']):
# indirect_ct.add((r['s'], r['o']))

# PART OF CHECK
valid_po, terms_pairs = ug.verify_relationship(terms_pairs, ug.select_po)
Expand All @@ -202,7 +204,7 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict):

valid_po_nr, _ = ug.verify_relationship(terms_valid_po, ug.select_po_nonredundant)

terms_s, terms_o = split_terms(transform_to_str(valid_po - valid_po_nr))
terms_s, terms_o = split_terms(transform_to_str(valid_po_nr))

rows_nvponr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))]

Expand All @@ -224,7 +226,7 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict):
# INDIRECT OVERLAPS CHECK
valid_o_nr, _ = ug.verify_relationship(transform_to_str(valid_overlaps), ug.select_overlaps_nonredundant)

terms_s, terms_o = split_terms(transform_to_str(valid_overlaps - valid_o_nr))
terms_s, terms_o = split_terms(transform_to_str(valid_o_nr))

rows_nvonr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))]

Expand All @@ -242,17 +244,65 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict):
records, valid_as, valid_ct = add_rows(records, valid_as, valid_ct, valid_ct_as_locatedin, 'located_in')

terms_ct_as = terms_ct_as - transform_to_str(valid_ct_as_locatedin)

# INDIRECT LOCATED IN CHECK
valid_loc_in_nr, _ = ug.verify_relationship(transform_to_str(valid_ct_as_locatedin), ug.select_li_nonredundant)

terms_s, terms_o = split_terms(transform_to_str(valid_loc_in_nr))

rows_vlinr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))]

# ADD RESULTS TO INDIRECT LOG
valid_error_log = pd.concat([valid_error_log, rows_vlinr])

for _, r in rows_vlinr.iterrows():
if 'UBERON' in r['s'] and 'UBERON' in r['o']:
indirect_as.add((r['s'], r['o']))
elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']):
indirect_ct.add((r['s'], r['o']))

# CONNECTED TO CHECK
valid_conn_to, terms_pairs = ug.verify_relationship(terms_pairs, ug.select_ct)
valid_ct_as_conn_to, terms_ct_as = ug.verify_relationship(terms_ct_as, ug.select_ct)

records, valid_as, valid_ct = add_rows(records, valid_as, valid_ct, valid_conn_to.union(valid_ct_as_conn_to), 'connected_to')

# INDIRECT CONNECTED TO CHECK
valid_conn_to_nr, _ = ug.verify_relationship(transform_to_str(valid_conn_to), ug.select_ct_nonredundant)

terms_s, terms_o = split_terms(transform_to_str(valid_conn_to_nr))

rows_vctnr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))]

# ADD RESULTS TO INDIRECT LOG
valid_error_log = pd.concat([valid_error_log, rows_vctnr])

for _, r in rows_vctnr.iterrows():
if 'UBERON' in r['s'] and 'UBERON' in r['o']:
indirect_as.add((r['s'], r['o']))
elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']):
indirect_ct.add((r['s'], r['o']))

# CONTINUOUS WITH CHECK
valid_cont_with, terms_pairs = ug.verify_relationship(terms_pairs, ug.select_continuous_with)

records, valid_as, valid_ct = add_rows(records, valid_as, valid_ct, valid_cont_with, 'continuous_with')

# INDIRECT CONTINUOUS WITH CHECK
valid_cont_with_nr, _ = ug.verify_relationship(transform_to_str(valid_cont_with), ug.select_cw_nonredundant)

terms_s, terms_o = split_terms(transform_to_str(valid_cont_with_nr))

rows_vctnr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))]

# ADD RESULTS TO INDIRECT LOG
valid_error_log = pd.concat([valid_error_log, rows_vctnr])

for _, r in rows_vctnr.iterrows():
if 'UBERON' in r['s'] and 'UBERON' in r['o']:
indirect_as.add((r['s'], r['o']))
elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']):
indirect_ct.add((r['s'], r['o']))

# STRICT CT-AS REPORT
terms_s, terms_o = split_terms(terms_ct_as)
Expand All @@ -264,6 +314,22 @@ def generate_class_graph_template(ccf_tools_df :pd.DataFrame, log_dict: dict):
# DEVELOPS FROM CHECK
valid_dev_from, terms_pairs = ug.verify_relationship(terms_pairs, ug.select_develops_from)
records, valid_as, valid_ct = add_rows(records, valid_as, valid_ct, valid_dev_from, 'develops_from')

# INDIRECT DEVELOPS FROM CHECK
valid_dev_nr, _ = ug.verify_relationship(transform_to_str(valid_dev_from), ug.select_dev_from_nonredundant)
print(valid_dev_from, valid_dev_nr)
terms_s, terms_o = split_terms(transform_to_str(valid_dev_nr))

rows_vdfnr = ccf_tools_df[ccf_tools_df[["s","o"]].apply(tuple, 1).isin(zip(terms_s, terms_o))]

# ADD RESULTS TO INDIRECT LOG
valid_error_log = pd.concat([valid_error_log, rows_vdfnr])

for _, r in rows_vdfnr.iterrows():
if 'UBERON' in r['s'] and 'UBERON' in r['o']:
indirect_as.add((r['s'], r['o']))
elif ('CL' in r['s'] or 'PCL' in r['s']) and ('CL' in r['o'] or 'PCL' in r['o']):
indirect_ct.add((r['s'], r['o']))

# AS-CT HAS PART
valid_has_part, terms_ct_as = ug.verify_relationship(terms_ct_as, ug.select_has_part)
Expand Down
79 changes: 75 additions & 4 deletions src/uberongraph_tools.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from SPARQLWrapper import SPARQLWrapper, JSON, RDFXML
from rdflib.graph import ConjunctiveGraph
from ccf_tools import chunks, transform_to_str
from ccf_tools import chunks, transform_to_str, split_terms

class UberonGraph():
def __init__(self):
Expand Down Expand Up @@ -71,14 +71,27 @@ def __init__(self):
PREFIX CL: <http://purl.obolibrary.org/obo/CL_>
PREFIX PCL: <http://purl.obolibrary.org/obo/PCL_>
SELECT ?subject ?object
FROM <http://reasoner.renci.org/ontology>
FROM <http://reasoner.renci.org/redundant>
{
VALUES (?subject ?object) {
%s
}
?subject connected_to: ?object .
}"""

self.select_ct_nonredundant = """
PREFIX connected_to: <http://purl.obolibrary.org/obo/RO_0002170>
PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
PREFIX CL: <http://purl.obolibrary.org/obo/CL_>
PREFIX PCL: <http://purl.obolibrary.org/obo/PCL_>
SELECT ?subject ?object
FROM <http://reasoner.renci.org/nonredundant>
{
VALUES (?subject ?object) {
%s
}
?subject connected_to: ?object .
}"""

self.select_label = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
Expand Down Expand Up @@ -110,6 +123,22 @@ def __init__(self):
?subject develops_from: ?object .
}
"""

self.select_dev_from_nonredundant = """
PREFIX develops_from: <http://purl.obolibrary.org/obo/RO_0002202>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
PREFIX CL: <http://purl.obolibrary.org/obo/CL_>
PREFIX PCL: <http://purl.obolibrary.org/obo/PCL_>
SELECT ?subject ?object
FROM <http://reasoner.renci.org/nonredundant>
{
VALUES (?subject ?object) {
%s
}
?subject develops_from: ?object .
}
"""

self.select_po_nonredundant = """
PREFIX part_of: <http://purl.obolibrary.org/obo/BFO_0000050>
Expand Down Expand Up @@ -234,7 +263,24 @@ def __init__(self):
}
?subject located_in: ?object .
}
"""
"""

self.select_li_nonredundant = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
PREFIX CL: <http://purl.obolibrary.org/obo/CL_>
PREFIX PCL: <http://purl.obolibrary.org/obo/PCL_>
PREFIX located_in: <http://purl.obolibrary.org/obo/RO_0001025>
SELECT ?subject ?object
FROM <http://reasoner.renci.org/nonredundant>
{
VALUES (?subject ?object) {
%s
}
?subject located_in: ?object .
}
"""

self.select_normalized_ic = """
PREFIX normalizedIC: <http://reasoner.renci.org/vocab/normalizedInformationContent>
PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
Expand Down Expand Up @@ -263,6 +309,20 @@ def __init__(self):
?subject continuous_with: ?object .
}
"""
self.select_cw_nonredundant = """
PREFIX continuous_with: <http://purl.obolibrary.org/obo/RO_0002150>
PREFIX UBERON: <http://purl.obolibrary.org/obo/UBERON_>
PREFIX CL: <http://purl.obolibrary.org/obo/CL_>

SELECT ?subject ?object
FROM <http://reasoner.renci.org/nonredundant>
{
VALUES (?subject ?object) {
%s
}
?subject continuous_with: ?object .
}
"""

def ask_uberon(self, r, q, urls=True):
""""""
Expand Down Expand Up @@ -458,4 +518,15 @@ def get_annotations(self, terms):
terms = "\n".join(terms)
annotations = self.construct_annotation(terms)

return annotations
return annotations

def check_indirect_rel(self, valid_rel, query):
valid_indirect, _ = self.verify_relationship(transform_to_str(valid_rel), query)

terms_s, terms_o = split_terms(transform_to_str(valid_indirect))

return terms_s, terms_o