From e16d54fe65eaef02f6fa717e4814e07996065c83 Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Tue, 5 Dec 2023 13:42:16 -0500 Subject: [PATCH 1/4] Add link to recipe template --- src/ontogpt/templates/recipe.py | 33 ++++++++++++++++++++----------- src/ontogpt/templates/recipe.yaml | 4 ++++ 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/ontogpt/templates/recipe.py b/src/ontogpt/templates/recipe.py index 6fb0d97b4..6b98d9a93 100644 --- a/src/ontogpt/templates/recipe.py +++ b/src/ontogpt/templates/recipe.py @@ -2,7 +2,7 @@ from datetime import datetime, date from enum import Enum from typing import List, Dict, Optional, Any, Union -from pydantic import BaseModel as BaseModel, Field +from pydantic import BaseModel as BaseModel, ConfigDict, Field import sys if sys.version_info >= (3, 8): from typing import Literal @@ -13,13 +13,13 @@ metamodel_version = "None" version = "None" -class ConfiguredBaseModel(BaseModel, - validate_assignment = True, - validate_default = True, - extra = 'forbid', - arbitrary_types_allowed = True, - use_enum_values = True): - pass +class ConfiguredBaseModel(BaseModel): + model_config = ConfigDict( + validate_assignment=True, + validate_default=True, + extra='forbid', + arbitrary_types_allowed=True, + use_enum_values = True) class NullDataOptions(str, Enum): @@ -35,7 +35,7 @@ class NullDataOptions(str, Enum): class Recipe(ConfiguredBaseModel): - url: Optional[str] = Field(None) + url: str = Field(...) label: Optional[str] = Field(None, description="""the name of the recipe""") description: Optional[str] = Field(None, description="""a brief textual description of the recipe""") categories: Optional[List[str]] = Field(default_factory=list, description="""a semicolon separated list of the categories to which this recipe belongs""") @@ -136,11 +136,21 @@ class Triple(CompoundExpression): class TextWithTriples(ConfiguredBaseModel): - + """ + A text containing one or more relations of the Triple type. + """ publication: Optional[Publication] = Field(None) triples: Optional[List[Triple]] = Field(default_factory=list) +class TextWithEntity(ConfiguredBaseModel): + """ + A text containing one or more instances of a single type of entity. + """ + publication: Optional[Publication] = Field(None) + entities: Optional[List[str]] = Field(default_factory=list) + + class RelationshipType(NamedEntity): id: str = Field(..., description="""A unique identifier for the named entity""") @@ -181,7 +191,8 @@ class AnnotatorResult(ConfiguredBaseModel): FoodItem.model_rebuild() Triple.model_rebuild() TextWithTriples.model_rebuild() +TextWithEntity.model_rebuild() RelationshipType.model_rebuild() Publication.model_rebuild() AnnotatorResult.model_rebuild() - + diff --git a/src/ontogpt/templates/recipe.yaml b/src/ontogpt/templates/recipe.yaml index be25fc805..073179cc0 100644 --- a/src/ontogpt/templates/recipe.yaml +++ b/src/ontogpt/templates/recipe.yaml @@ -17,6 +17,10 @@ prefixes: qudt: http://qudt.org/schema/qudt/ dbpediaont: http://dbpedia.org/ontology/ +# This template incorportates syntax from +# linkml-owl to define OWL interpretations +# and enable advanced functionality. +# https://linkml.io/linkml-owl/templates/ default_prefix: recipe default_range: string From a4235979581607a1c1fa61f9dbdf28f7827a3150 Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Tue, 5 Dec 2023 13:44:40 -0500 Subject: [PATCH 2/4] Formatting fixes --- docs/custom.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/custom.md b/docs/custom.md index 5adec41e9..87a82cfef 100644 --- a/docs/custom.md +++ b/docs/custom.md @@ -281,7 +281,7 @@ Ex.: ontogpt extract -t mendelian_disease.MendelianDisease -i marfan-wikipedia.txt ``` -OntoGPT-specific extensions are specified as _annotations_. +OntoGPT-specific extensions are specified as *annotations*. You can specify a set of annotators for a field using the `annotators` annotation. @@ -353,7 +353,7 @@ Ex. the `gocam` schema has an attribute: range: GeneMolecularActivityRelationship ``` -The range `GeneMolecularActivityRelationship` has been specified _inline_, so it will nest. +The range `GeneMolecularActivityRelationship` has been specified *inline*, so it will nest. The generated prompt is: @@ -374,7 +374,7 @@ If you have installed OntoGPT directly from its GitHub repository, then you may 1. Move the schema file to the `src/ontogpt/templates` directory. 2. Run `make` from the root of the repository to generate Pydantic versions of the schema. -If you have installed OntoGPT from `pip`, _or_ if you can't use the `make` command, the process is similar, though it will depend on where the package is installed. +If you have installed OntoGPT from `pip`, *or* if you can't use the `make` command, the process is similar, though it will depend on where the package is installed. 1. Use the LinkML `gen-pydantic` tool to generate Pydantic classes. If your schema is named `alfred.yaml`, then run the following: From 9c6b2b5c59ca927e7da1c077b0ea72048157e16b Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Tue, 5 Dec 2023 14:34:26 -0500 Subject: [PATCH 3/4] Minor template fixes --- src/ontogpt/templates/recipe.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ontogpt/templates/recipe.yaml b/src/ontogpt/templates/recipe.yaml index 073179cc0..6f1c55165 100644 --- a/src/ontogpt/templates/recipe.yaml +++ b/src/ontogpt/templates/recipe.yaml @@ -33,7 +33,7 @@ classes: Recipe: tree_root: true close_mappings: - - FOODON:00004081 + - FOODON:00004081 # food recipe attributes: url: identifier: true @@ -53,7 +53,7 @@ classes: owl: AnnotationProperty, AnnotationAssertion categories: description: a semicolon separated list of the categories to which this recipe belongs - range: RecipeCategory + range: RecipeCategory multivalued: true slot_uri: dcterms:subject annotations: @@ -172,7 +172,7 @@ classes: state: description: the state of the food item (e.g. chopped, diced) annotations: - owl: DataProperty, DataHasValue + owl: DataProperty, DataHasValue FoodType: is_a: NamedEntity From 3cd076573fd357316725af752e51652e11bdbf44 Mon Sep 17 00:00:00 2001 From: caufieldjh Date: Tue, 5 Dec 2023 14:50:46 -0500 Subject: [PATCH 4/4] Expand docs section on OWL annotations and templates --- docs/custom.md | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/docs/custom.md b/docs/custom.md index 87a82cfef..648c5b6f7 100644 --- a/docs/custom.md +++ b/docs/custom.md @@ -367,6 +367,96 @@ The output of this is then passed through further SPIRES iterations. LLMs have context sizes limiting the combined length of their inputs and outputs. The `gpt-3.5-turbo` model, for example, has a 4,096 token limit (prompt + completion), while the `gpt-3.5-turbo-16k` model has a larger context of 16,384 tokens. +## Advanced functionality with linkml-owl + +A LinkML schema used in OntoGPT may include annotations describing how each component relates to OWL syntax. + +This level of detail may be necessary if your data model includes complex logic beyond simple hierarchical relationships. + +For example, if you are extracting details of chemical reactions, it may be necessary to keep track of details like stoichiometry or charge. [See a relevant example here](https://linkml.io/linkml-owl/templates/). + +Incorporating OWL annotations into the custom schema (remember to export using the `-O owl` option) also supports importing the results into an ontology editor like Protege, at which point it may be reasoned over. + +The `recipe` template in OntoGPT incorporates several OWL annotations: + +```yaml +classes: + Recipe: + tree_root: true + close_mappings: + - FOODON:00004081 + attributes: + url: + identifier: true + range: uriorcurie + slot_uri: rdf:Resource + annotations: + prompt.skip: true + label: + description: the name of the recipe + slot_uri: rdfs:label + annotations: + owl: AnnotationProperty, AnnotationAssertion + description: + description: a brief textual description of the recipe + slot_uri: dcterms:description + annotations: + owl: AnnotationProperty, AnnotationAssertion + categories: + description: a semicolon separated list of the categories to which this recipe belongs + range: RecipeCategory + multivalued: true + slot_uri: dcterms:subject + annotations: + owl: AnnotationAssertion + ingredients: + description: a semicolon separated list of the ingredients plus quantities of the recipe + multivalued: true + range: Ingredient + slot_uri: FOODON:00002420 + annotations: + owl: ObjectProperty, ObjectSomeValuesFrom + steps: + description: a semicolon separated list of the individual steps involved in this recipe + multivalued: true + range: Step + annotations: + owl: ObjectProperty, ObjectSomeValuesFrom + annotations: + owl: Class + owl.template: | + EquivalentClasses( + {{url}} + ObjectIntersectionOf( + recipe:Recipe + + {% for step in steps %} + ObjectSomeValuesFrom( + recipe:steps + {{tr(step)}} + ) + {% endfor %} + {% for ingredient in ingredients %} + ObjectSomeValuesFrom( + FOODON:00002420 + {{tr(ingredient)}} + ) + {% endfor %} + ) + ) +... +``` + +Several of the slots above, like `close_mappings` and `slot_uri`, aren't exclusive to OWL but define the parts of this data model in terms of existing vocabularies, so the schema and any extracted results will be more compatible with other models and methods. Here, `close_mappings` is used to show that the `Recipe` class is close but not necessarily identical to `FOODON:00004081`, or "food recipe". + +The `owl` slot under `annotations` for these attributes defines one or more corresponding OWL axiom types. Because the `label` attribute may be `AnnotationProperty, AnnotationAssertion` in OWL ([see the OWL2 syntax on Annotations](https://www.w3.org/TR/owl2-syntax/#Annotations)) we know it can be applied as a property for something else, like an axiom or a specific entity with an IRI. This is how the `rdfs:label` annotation property usually works so this isn't surprising. + +The `owl.template` slot defines template logic relating a Recipe to its component objects: steps and ingredients. It begins with an `EquivalentClasses` axiom to define the identifier of the recipe, which we assume to be a URL, as identical to the class expression in subsequent lines. Specifically, that `Recipe` must include both a series of steps (e.g., "fry", "chop", etc.) and a series of ingredients. The ingredients relate to the recipe through the property `FOODON:00002420`, or "has ingredient". + +Note that everything in {curly brackets} is a template of some kind. The Jinja template system is used in the example in lines like `{% for step in steps %}` where a loop is used. Template slots like `{{url}}` may be accessed directly with their names. So what makes `{{tr(step)}}` different from `{{step}}`? The `tr()` function used here translates its input into an OWL entity so it may be used to generate valid OWL axioms. + +See also: the [documentation page on OWL exports](owl_exports.md) and the [linkml-owl documentation](https://linkml.io/linkml-owl/). + ## Install a custom schema If you have installed OntoGPT directly from its GitHub repository, then you may install a custom schema like this: