diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 8f047bd2..7df0342a 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -1,58 +1,67 @@ -name: Build +# Built from: +# https://docs.github.com/en/actions/guides/building-and-testing-python +# https://github.com/snok/install-poetry#workflows-and-tips -on: - push: - branches: [ main ] +name: Build and test linkml-runtime + +on: [pull_request] jobs: - unittests-n-commits: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10"] steps: - - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - - name: Update requirements files and test - run: | - python -m pip install --upgrade pip - pip install pipenv-to-requirements - pipenv_to_requirements - pip install -r requirements.txt - pip install -r requirements-dev.txt - - - name: Run make file - run: | - make - - - name: Run unit tests over output - run: | - pip install pipenv - pipenv install --dev - pipenv run python -m unittest discover -p 'test_*.py' - - - name: Check in changes to requirements files - run: | - git add requirements*.txt - if [[ ! -z $(git status -s requirements*.txt) ]] - then - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - git commit -m 'Automatically generated requirements.txt and requirements-dev.txt' requirements*.txt - git push - fi - - - name: Check updated content - run: | - find linkml_model -name __pycache__ -type d -prune -exec rm -rf '{}' '+' - git add --force docs linkml_model - if [[ ! -z $(git status -s .) ]] - then - git config --local user.email "action@github.com" - git config --local user.name "GitHub Action" - git commit -m 'Updating generated outputs' . - git push - fi + + #---------------------------------------------- + # check-out repo and set-up python + #---------------------------------------------- + - name: Check out repository + uses: actions/checkout@v2 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + + #---------------------------------------------- + # install & configure poetry + #---------------------------------------------- + - name: Install Poetry + uses: snok/install-poetry@v1.3 + with: + virtualenvs-create: true + virtualenvs-in-project: true + + #---------------------------------------------- + # load cached venv if cache exists + #---------------------------------------------- + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v2 + with: + path: .venv + key: venv-${{ runner.os }}-${{ hashFiles('**/poetry.lock') }} + + #---------------------------------------------- + # install dependencies if cache does not exist + #---------------------------------------------- + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: poetry install --no-interaction --no-root + + #---------------------------------------------- + # install your root project, if required + #---------------------------------------------- + - name: Install library + run: poetry install --no-interaction + + #---------------------------------------------- + # run test suite + #---------------------------------------------- + - name: Run tests + run: make test + diff --git a/.github/workflows/pypi-publish.yaml b/.github/workflows/pypi-publish.yaml index eb47fa67..b9eb0693 100644 --- a/.github/workflows/pypi-publish.yaml +++ b/.github/workflows/pypi-publish.yaml @@ -13,19 +13,23 @@ jobs: - uses: actions/checkout@v2 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v2.2.2 with: python-version: 3.8 + - name: Install Poetry + uses: snok/install-poetry@v1.1.6 + with: + virtualenvs-create: true + virtualenvs-in-project: true + - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install wheel + run: poetry install --no-interaction - - name: build a binary wheel dist + - name: Build source and wheel archives run: | - rm -fr dist - python setup.py bdist_wheel sdist + poetry version $(git describe --tags --abbrev=0) + poetry build - name: Publish distribution 📦 to PyPI uses: pypa/gh-action-pypi-publish@v1.2.2 diff --git a/AUTHORS b/AUTHORS deleted file mode 100644 index 9b2bf41f..00000000 --- a/AUTHORS +++ /dev/null @@ -1,4 +0,0 @@ -Chris Mungall -GitHub Action -Harold Solbrig -hsolbrig diff --git a/ChangeLog b/ChangeLog deleted file mode 100644 index 5db13f63..00000000 --- a/ChangeLog +++ /dev/null @@ -1,131 +0,0 @@ -CHANGES -======= - -* Create inner venv before creating Pipfile -* Change permissions on build.sh and clean up Make file -* Add a simple set of unit tests -* Regenerate w/ latest linkml part 2 -* Regenerate w/ latest linkml part 2 -* Regenerate w/ latest linkml -* Update linkml-runtime dependencies -* Update linkml-runtime dependencies -* Patch to update model to include JsonObj types -* Patch to update model to include JsonObj types -* Cut the linkml cord -* Updating generated outputs -* Undo accidental change -* Switch to linkml-runtime dependency -* Switch to linkml-runtime dependency -* Updating generated outputs -* Updating generated outputs -* Automatically generated requirements.txt and requirements-dev.txt -* Update Makefile documents generation to use flat types option and emit warnings if warnings are to be had -* Add flat output for docs -* Updating generated outputs -* Update Makefile -* Further tweaks to the redirects -- seems to work this time -* Remove types subdirectory from make documents -* Add documentation to mixins -* Updating generated outputs -* Add link to actual documentation -* Updating generated outputs -* Update main.yaml -* Automatically generated requirements.txt and requirements-dev.txt -* Add mkdocs to dev requirements -* Create initial docs -* Fix issue in pycache removal -* Use mkdocs in docs directory -* Updating generated outputs -* Automatically generated requirements.txt and requirements-dev.txt -* Add linkml to the dev build -* Add simplified file location -* Remove linkml dependency -* Loosen linkml requirements -* Remove linkml requirement - make it manual -* Loosen up the linkml requirements -* Finally drop the biolinkml habit -* Remove extraneous placeholder -* Cut the biolinkml cord -* New artifact from build process - -v0.0.3 ------- - -* Updating generated outputs -* Lift model to the root directory. Also gen RDF -* Updating generated outputs -* Get the model itself in the distro directory -* Clear out the import map -- not being used at the moment anyway -* Updating generated outputs -* Rework rewrite rules to match new configuration -* Updating generated outputs -* Automatically generated requirements.txt and requirements-dev.txt -* Remove RDF gen until we get things sorted -* Force directory -* Force directory -* Workflow update -* Step 3 -* Step 2 of refactor -* Step one of artifact move -* Updating generated outputs -* Add the various output files to the pypi distro -* Updating generated outputs -* Remove cache -- is this getting added via build? -* Updating generated outputs -* Add LINKML\_NAMESPACE -* Updating generated outputs -* Automatically generated requirements.txt and requirements-dev.txt -* Add tools to get at generated files -* Add slashes at the end of directories - -v0.0.2 ------- - -* Updating generated outputs -* Automatically generated requirements.txt and requirements-dev.txt -* Further make tweaks -* Tweaks to make file -* Switch to newest biolinkml to address import issue -* Re- add URL's to imports -* Updating generated outputs -* Types go into same namespace as rest of model -* Updating generated outputs -* Remove a cache that accidentally got put -* Updating generated outputs -* Fix issue in make file -* Remove python directory and add linkml\_model to build -* Generate all contexts, including context.jsonld -* Updating generated outputs -* Currently model name must match file name in linkml -* Updating generated outputs -* Fix error in meta.yaml id and name -* Updating generated outputs -* Change the context generators to include all sources -* Updating generated outputs -* Add tools to keep from trying to commit generated files -* Updating generated outputs -* Got the mod rewrite rules working -* Updating generated outputs -* Add github action to publish on release -* Updating generated outputs -* Automatically generated requirements.txt and requirements-dev.txt -* Fix syntax on rdflib-jsonld import -* v0.0.1 drop - -v0.0.1 ------- - -* Switch to latest biolinkml so meta.py compiles -* Proposed rewrite rules sort of working -* Proposed rewrite rules sort of working -* Checkpoint -* Updating generated outputs -* Back off to python 3.8 -* More work on Makefile -* .gitignore was a bit too promiscuous -* Further tweaks to make file -* Add requirements-dev.txt as a placeholder -* master --> main -* Add json and jsonld directories -* First cut at the model -* Initial commit diff --git a/MAINTAINERS.md b/MAINTAINERS.md deleted file mode 100644 index 883ae80d..00000000 --- a/MAINTAINERS.md +++ /dev/null @@ -1,35 +0,0 @@ -# How to make a release - -Make an - -1) Make changes to the source schema -2) run `make all` -3) run `make tests` -4) run `make gh-deploy` - -Your documentation will be available from a URL https://my_org_or_name.github.io/my_schema/ - -Then every time you change the source schema, run: - -```bash -make all -``` - -This will generate files in: - - * [docs] - * [jsonschema] - * [shex] - * [owl] - * [rdf] - -Do **not** git add the files in docs - -Once the files are generated, run - -```bash -make gh-deploy -``` - -Your documentation will be available from a URL https://my_org_or_name.github.io/my_schema/ - diff --git a/Makefile b/Makefile deleted file mode 100644 index 08ee5431..00000000 --- a/Makefile +++ /dev/null @@ -1,268 +0,0 @@ -# All artifacts of the build should be preserved -.SECONDARY: - -# ---------------------------------------- -# Model documentation and schema directory -# ---------------------------------------- -PKG_DIR = linkml_model -SRC_DIR = $(PKG_DIR)/model -SCHEMA_DIR = $(SRC_DIR)/schema -MODEL_DOCS_DIR = $(SRC_DIR)/docs -SOURCE_FILES := $(shell find $(SCHEMA_DIR) -name '*.yaml') -SCHEMA_NAMES = $(patsubst $(SCHEMA_DIR)/%.yaml, %, $(SOURCE_FILES)) - -SCHEMA_NAME = meta -SCHEMA_SRC = $(SCHEMA_DIR)/$(SCHEMA_NAME).yaml -PKG_TGTS = graphql json jsonld jsonschema owl rdf shex sqlddl -TGTS = docs python $(PKG_TGTS) - -# Targets by PKG_TGT -PKG_T_GRAPHQL = $(PKG_DIR)/graphql -PKG_T_JSON = $(PKG_DIR)/json -PKG_T_JSONLD_CONTEXT = $(PKG_DIR)/jsonld -PKG_T_JSON_SCHEMA = $(PKG_DIR)/jsonschema -PKG_T_OWL = $(PKG_DIR)/owl -PKG_T_RDF = $(PKG_DIR)/rdf -PKG_T_SHEX = $(PKG_DIR)/shex -PKG_T_SQLDDL = $(PKG_DIR)/sqlddl -PKG_T_DOCS = $(MODEL_DOCS_DIR) -PKG_T_PYTHON = $(PKG_DIR) - -# Global generation options -GEN_OPTS = --log_level WARNING -#ENV = export PIPENV_VENV_IN_PROJECT=true && export PIPENV_PIPFILE=make-venv/Pipfile && export PIPENV_IGNORE_VIRTUALENVS=1 -ENV = -#RUN = $(ENV) && pipenv run -RUN = pipenv run - -# ---------------------------------------- -# TOP LEVEL TARGETS -# ---------------------------------------- -all: install gen test - -# --------------------------------------- -# We don't want to pollute the python environment with linkml tool specific packages. For this reason, -# we install an isolated instance of linkml in the pipenv-linkml directory -# --------------------------------------- -#install: make-venv/env.lock -install: - - - -# --------------------------------------- -# Test runner -# ---------------------------------------- -test: - pipenv install --dev - pipenv run python -m unittest - -# --------------------------------------- -# GEN: run generator for each target -# --------------------------------------- -gen: $(patsubst %,gen-%,$(TGTS)) - -# --------------------------------------- -# CLEAN: clear out all of the targets -# --------------------------------------- -clean: - rm -rf target/* -.PHONY: clean - -# --------------------------------------- -# SQUEAKY_CLEAN: remove all of the final targets to make sure we don't leave old artifacts around -# --------------------------------------- -squeaky-clean: uninstall clean $(patsubst %,squeaky-clean-%,$(PKG_TGTS)) - find docs/* ! -name 'README.*' -exec rm -rf {} + - find $(PKG_DIR) -name "*.py" ! -name "__init__.py" ! -name "linkml_files.py" -exec rm -f {} + - -squeaky-clean-%: clean - find $(PKG_DIR)/$* ! -name model ! -name 'README.*' ! -name $* -type f -exec rm -f {} + - -# --------------------------------------- -# T: List files to generate -# --------------------------------------- -t: - echo $(SCHEMA_NAMES) - -# --------------------------------------- -# ECHO: List all targets -# --------------------------------------- -echo: - echo $(patsubst %,gen-%,$(TGTS)) - -tdir-%: - rm -rf target/$* - mkdir -p target/$* - -# --------------------------------------- -# MARKDOWN DOCS -# Generate documentation ready for mkdocs -# --------------------------------------- -gen-docs: docs/index.html -.PHONY: gen-docs - -docs/index.html: target/docs/index.md install - echo "HERE!" $(PKG_T_DOCS) - mkdir -p $(PKG_T_DOCS) - cp -R $(MODEL_DOCS_DIR)/*.md target/docs - # mkdocs.yml moves from the target/docs to the docs directory - $(RUN) mkdocs build - -target/docs/index.md: $(SCHEMA_DIR)/$(SCHEMA_NAME).yaml tdir-docs install - $(RUN) gen-markdown $(GEN_OPTS) --mergeimports --notypesdir --warnonexist --dir target/docs $< && \ - cp -pr specification target/docs/ - - -# --------------------------------------- -# PYTHON Source -# --------------------------------------- -gen-python: $(patsubst %, $(PKG_T_PYTHON)/%.py, $(SCHEMA_NAMES)) -$(PKG_T_PYTHON)/%.py: target/python/%.py - mkdir -p $(PKG_T_PYTHON) - cp $< $@ -target/python/%.py: $(SCHEMA_DIR)/%.yaml tdir-python install - $(RUN) gen-python $(GEN_OPTS) --genmeta --no-slots --no-mergeimports $< > $@ - -# --------------------------------------- -# GRAPHQL Source -# --------------------------------------- -gen-graphql: $(PKG_T_GRAPHQL)/$(SCHEMA_NAME).graphql -.PHONY: gen-graphql - -$(PKG_T_GRAPHQL)/%.graphql: target/graphql/%.graphql - mkdir -p $(PKG_T_GRAPHQL) - cp $< $@ - -target/graphql/%.graphql: $(SCHEMA_DIR)/%.yaml tdir-graphql install - $(RUN) gen-graphql $(GEN_OPTS) $< > $@ - -# --------------------------------------- -# JSON Schema -# --------------------------------------- -gen-jsonschema: $(patsubst %, $(PKG_T_JSON_SCHEMA)/%.schema.json, $(SCHEMA_NAMES)) -.PHONY: gen-jsonschema - -$(PKG_T_JSON_SCHEMA)/%.schema.json: target/json_schema/%.schema.json - mkdir -p $(PKG_T_JSON_SCHEMA) - cp $< $@ - -target/json_schema/%.schema.json: $(SCHEMA_DIR)/%.yaml tdir-json_schema install - $(RUN) gen-json-schema $(GEN_OPTS) -t schema_definition $< > $@ - -# --------------------------------------- -# ShEx -# --------------------------------------- -gen-shex: $(patsubst %, $(PKG_T_SHEX)/%.shex, $(SCHEMA_NAMES)) $(patsubst %, $(PKG_T_SHEX)/%.shexj, $(SCHEMA_NAMES)) -.PHONY: gen-shex - -$(PKG_T_SHEX)/%.shex: target/shex/%.shex - mkdir -p $(PKG_T_SHEX) - cp $< $@ -$(PKG_T_SHEX)/%.shexj: target/shex/%.shexj - mkdir -p $(PKG_T_SHEX) - cp $< $@ - -target/shex/%.shex: $(SCHEMA_DIR)/%.yaml tdir-shex install - $(RUN) gen-shex --no-mergeimports $(GEN_OPTS) $< > $@ -target/shex/%.shexj: $(SCHEMA_DIR)/%.yaml tdir-shex install - $(RUN) gen-shex --no-mergeimports $(GEN_OPTS) -f json $< > $@ - -# --------------------------------------- -# OWL -# --------------------------------------- -gen-owl: $(PKG_T_OWL)/$(SCHEMA_NAME).owl.ttl -.PHONY: gen-owl - -$(PKG_T_OWL)/%.owl.ttl: target/owl/%.owl.ttl - mkdir -p $(PKG_T_OWL) - cp $< $@ -target/owl/%.owl.ttl: $(SCHEMA_DIR)/%.yaml tdir-owl install - $(RUN) gen-owl $(GEN_OPTS) $< > $@ - -# --------------------------------------- -# JSON-LD Context -# --------------------------------------- -gen-jsonld: $(patsubst %, $(PKG_T_JSONLD_CONTEXT)/%.context.jsonld, $(SCHEMA_NAMES)) $(patsubst %, $(PKG_T_JSONLD_CONTEXT)/%.model.context.jsonld, $(SCHEMA_NAMES)) -.PHONY: gen-jsonld - -$(PKG_T_JSONLD_CONTEXT)/%.context.jsonld: target/jsonld/%.context.jsonld - mkdir -p $(PKG_T_JSONLD_CONTEXT) - cp $< $@ - -$(PKG_T_JSONLD_CONTEXT)/%.model.context.jsonld: target/jsonld/%.model.context.jsonld - mkdir -p $(PKG_T_JSONLD_CONTEXT) - cp $< $@ - -target/jsonld/%.context.jsonld: $(SCHEMA_DIR)/%.yaml tdir-jsonld install - $(RUN) gen-jsonld-context $(GEN_OPTS) --no-mergeimports $< > $@ - -target/jsonld/%.model.context.jsonld: $(SCHEMA_DIR)/%.yaml tdir-jsonld install - $(RUN) gen-jsonld-context $(GEN_OPTS) --no-mergeimports $< > $@ - -# --------------------------------------- -# Plain Old (PO) JSON -# --------------------------------------- -gen-json: $(patsubst %, $(PKG_T_JSON)/%.json, $(SCHEMA_NAMES)) -.PHONY: gen-json - -$(PKG_T_JSON)/%.json: target/json/%.json - mkdir -p $(PKG_T_JSON) - cp $< $@ -target/json/%.json: $(SCHEMA_DIR)/%.yaml tdir-json install - $(RUN) gen-jsonld $(GEN_OPTS) --no-mergeimports $< > $@ - -# --------------------------------------- -# RDF -# --------------------------------------- -gen-rdf: gen-jsonld $(patsubst %, $(PKG_T_RDF)/%.ttl, $(SCHEMA_NAMES)) $(patsubst %, $(PKG_T_RDF)/%.model.ttl, $(SCHEMA_NAMES)) -.PHONY: gen-rdf - -$(PKG_T_RDF)/%.ttl: target/rdf/%.ttl - mkdir -p $(PKG_T_RDF) - cp $< $@ -$(PKG_T_RDF)/%.model.ttl: target/rdf/%.model.ttl - mkdir -p $(PKG_T_RDF) - cp $< $@ - -target/rdf/%.ttl: $(SCHEMA_DIR)/%.yaml $(PKG_DIR)/jsonld/%.context.jsonld tdir-rdf install - $(RUN) gen-rdf $(GEN_OPTS) --context $(realpath $(word 2,$^)) $< > $@ -target/rdf/%.model.ttl: $(SCHEMA_DIR)/%.yaml $(PKG_DIR)/jsonld/%.model.context.jsonld tdir-rdf install - $(RUN) gen-rdf $(GEN_OPTS) --context $(realpath $(word 2,$^)) $< > $@ - -# --------------------------------------- -# SQLDDL -# --------------------------------------- -gen-sqlddl: $(PKG_T_SQLDDL)/$(SCHEMA_NAME).sql -.PHONY: gen-sqlddl - -$(PKG_T_SQLDDL)/%.sql: target/sqlddl/%.sql - mkdir -p $(PKG_T_SQLDDL) - cp $< $@ -target/sqlddl/%.sql: $(SCHEMA_DIR)/%.yaml tdir-sqlddl install - $(RUN) gen-sqlddl $(GEN_OPTS) $< > $@ - -# test docs locally. -docserve: gen-docs - $(RUN) mkdocs serve - -# --------------------------------------- -# VALIDATION -# --------------------------------------- -EXAMPLES = relational-roles rules slot-group path unique-key inlining-union - -all-validate: $(patsubst %, validate-%, $(EXAMPLES)) -validate-%: examples/%-example.yaml - $(RUN) linkml-validate -C SchemaDefinition -s linkml_model/model/schema/meta.yaml $< - - -TITLE = "LinkML Specification" -SPEC = target/SPECIFICATION.md -target/0%.md: specification/0%.md - (cat $< && echo) > $@.tmp && mv $@.tmp $@ - -$(SPEC): $(wildcard specification/0*.md) - cat specification/0*.md > $@.tmp && mv $@.tmp $@ -SPECIFICATION.pdf: $(SPEC) - pandoc -T $(TITLE) --pdf-engine=xelatex --toc -s $< -o $@ -SPECIFICATION.html: $(SPEC) - pandoc --metadata pagetitle=$(TITLE) -f gfm --toc -s $< -o $@ diff --git a/Pipfile b/Pipfile index 73a06c92..e65a4bad 100644 --- a/Pipfile +++ b/Pipfile @@ -4,13 +4,12 @@ verify_ssl = true name = "pypi" [packages] -linkml-runtime = "*" -linkml = ">= 1.2.0" +linkml-runtime = ">= 1.2.22" mkdocs = "*" mkdocs-mermaid2-plugin = "*" [dev-packages] -linkml = "*" +linkml = ">= 1.2.0" mkdocs = "*" [pipenv] diff --git a/about.yaml b/about.yaml new file mode 100644 index 00000000..b45f348f --- /dev/null +++ b/about.yaml @@ -0,0 +1,3 @@ +name: linkml-model +description: LinkML metamodel +source_schema_path: linkml_model/model/meta.yaml diff --git a/examples/dynamic-enums-example.yaml b/examples/dynamic-enums-example.yaml index 8aba0ac8..a62f3dc1 100644 --- a/examples/dynamic-enums-example.yaml +++ b/examples/dynamic-enums-example.yaml @@ -102,7 +102,7 @@ enums: - https://build.fhir.org/valueset-example-intensional.json.html include: - reachable_from: - source_ontology: bioregistry:loinc + source_ontology: "loinc:" source_nodes: - loinc:LP43571-6 is_direct: true @@ -126,6 +126,15 @@ enums: concepts: - LOINC:5932-9 + BodyPartEnum: + reachable_from: + source_ontology: obo:cl + source_nodes: + - CL:0000540 ## neuron + include_self: false + relationship_types: + - rdfs:subClassOf + Brand: enum_uri: wikidata:Q431289 include: @@ -145,9 +154,11 @@ enums: code_set_version: "1.0.0" pv_formula: CODE include: - - LP43571-6 + - concepts: + - LP43571-6 minus: - - 5932-9 + - concepts: + - 5932-9 reachable_from: source_ontology: http://loinc.org source_nodes: @@ -157,4 +168,4 @@ enums: include_self: true traverse_up: false concepts: - - http://loinc.org/LP43571-6 \ No newline at end of file + - http://loinc.org/LP43571-6 diff --git a/linkml_model/model/schema/meta.yaml b/linkml_model/model/schema/meta.yaml index e9f3f807..afb695a1 100644 --- a/linkml_model/model/schema/meta.yaml +++ b/linkml_model/model/schema/meta.yaml @@ -4,7 +4,10 @@ name: meta description: |- The metamodel for schemas defined using the Linked Data Modeling Language framework. - For more information on LinkML, see [linkml.io](https://linkml.io) + For more information on LinkML: + + * [linkml.io](https://linkml.io) main website + * [specification](https://linkml.io/linkml-model/docs/specification/) Core metaclasses: @@ -72,16 +75,37 @@ imports: - linkml:units subsets: - owl: - description: Set of slots that appear in the OWL representation of a model - minimal: - description: Minimal set of slots for defining a model - basic: - description: An extension of minimal that is a basic subset that can be implemented by a broad variety of tools - relational_model: - description: The set of constructs that have an equivalent in the classic relational mode as defined by Codd - object_oriented: - description: The set of constructs that have an equivalent in a minimal object oriented metamodel + SpecificationSubset: + title: specification subset + description: |- + A subset that includes all the metamodel elements that form part of the normative LinkML specification + OwlProfile: + title: owl profile + description: |- + A profile that includes all the metamodel elements whose semantics can be expressed in OWL + MinimalSubset: + title: minimal subset + description: |- + The absolute minimal set of elements necessary for defining a minimal schema + BasicSubset: + title: basic subset + description: |- + An extension of MinimalSubset that avoids advanced constructs and can be implemented by a broad variety of tools + RelationalModelProfile: + title: relational model profile + description: |- + A profile that includes all the metamodel elements whose semantics can be expressed using the classic Relational Model. + The Relational Model excludes collections (multivalued slots) as first class entities. Instead, these must be + mapped to backreferences + + The classic Relational Model excludes inheritance and polymorphism -- these must be rolled down to + concrete classes or otherwise transformed + + ObjectOrientedProfile: + title: object oriented profile + description: |- + A profile that includes all the metamodel elements whose semantics can be expressed using a minimal + implementation of the object oriented metamodel as employed by languages such as Java and Python #================================== # Slots # @@ -92,6 +116,7 @@ slots: # Common metadata shared by all elements # ----------------------------------- name: + rank: 1 aliases: - short name - unique name @@ -102,33 +127,33 @@ slots: form the globally unique subject of the target class. slot_uri: rdfs:label in_subset: - - owl - - minimal - - basic - - relational_model - - object_oriented + - SpecificationSubset + - OwlProfile + - MinimalSubset + - BasicSubset + - RelationalModelProfile + - ObjectOrientedProfile exact_mappings: - schema:name see_also: - https://en.wikipedia.org/wiki/Data_element_name title: + rank: 3 aliases: - long name domain: element description: the official title of the element slot_uri: dcterms:title in_subset: - - owl - - basic + - BasicSubset conforms_to: domain: element description: An established standard to which the element conforms. slot_uri: dcterms:conformsTo in_subset: - - owl - - basic + - BasicSubset categories: range: uriorcurie @@ -138,7 +163,7 @@ slots: description: >- controlled terms used to categorize an element in_subset: - - basic + - BasicSubset keywords: domain: element @@ -148,7 +173,7 @@ slots: range: string multivalued: true in_subset: - - basic + - BasicSubset definition_uri: domain: element @@ -172,9 +197,11 @@ slots: - https://github.com/linkml/linkml/issues/194 - https://github.com/linkml/linkml-model/issues/28 in_subset: - - basic + - SpecificationSubset + - BasicSubset description: + rank: 5 aliases: - definition domain: element @@ -185,7 +212,7 @@ slots: - schema:description recommended: true in_subset: - - basic + - BasicSubset structured_aliases: slot_uri: skosxl:altLabel @@ -202,13 +229,14 @@ slots: - alternative labels - designations domain: element + description: Alaternate names for the element range: string multivalued: true slot_uri: skos:altLabel exact_mappings: - schema:alternateName in_subset: - - basic + - BasicSubset deprecated: domain: element @@ -217,7 +245,7 @@ slots: close_mappings: - owl:deprecated in_subset: - - basic + - BasicSubset todos: domain: element @@ -225,7 +253,7 @@ slots: description: Outstanding issue that needs resolution multivalued: true in_subset: - - basic + - BasicSubset notes: domain: element @@ -233,8 +261,7 @@ slots: description: editorial notes about an element intended for internal consumption slot_uri: skos:editorialNote in_subset: - - owl - - basic + - BasicSubset comments: domain: element @@ -244,8 +271,7 @@ slots: exact_mappings: - rdfs:comment in_subset: - - owl - - basic + - BasicSubset in_subset: domain: element @@ -257,7 +283,7 @@ slots: translator knowledge graph) slot_uri: OIO:inSubset in_subset: - - basic + - BasicSubset from_schema: domain: element @@ -267,6 +293,8 @@ slots: notes: - A stronger model would be range schema_definition, but this doesn't address the import model slot_uri: skos:inScheme + in_subset: + - SpecificationSubset imported_from: domain: element @@ -281,8 +309,7 @@ slots: description: a reference slot_uri: rdfs:seeAlso in_subset: - - owl - - basic + - BasicSubset owned_by: aliases: @@ -294,7 +321,7 @@ slots: close_mappings: - rdfs:isDefinedBy in_subset: - - basic + - BasicSubset created_by: domain: element @@ -302,7 +329,7 @@ slots: description: agent that created the element slot_uri: pav:createdBy in_subset: - - basic + - BasicSubset created_on: domain: element @@ -310,7 +337,7 @@ slots: description: time at which the element was created slot_uri: pav:createdOn in_subset: - - basic + - BasicSubset last_updated_on: domain: element @@ -318,7 +345,7 @@ slots: description: time at which the element was last updated slot_uri: pav:lastUpdatedOn in_subset: - - basic + - BasicSubset modified_by: domain: element @@ -326,7 +353,7 @@ slots: description: agent that modified the element slot_uri: oslc:modifiedBy in_subset: - - basic + - BasicSubset status: aliases: @@ -341,7 +368,7 @@ slots: - https://www.hl7.org/fhir/valueset-publication-status.html ## Draft, Active, Retired, Unknown - https://www.hl7.org/fhir/versions.html#std-process ## Draft, Trial Use, Normative, Informative, Deprecated in_subset: - - basic + - BasicSubset literal_form: aliases: @@ -383,62 +410,71 @@ slots: comments: - The described resource may be derived from the related resource in whole or in part in_subset: - - basic + - BasicSubset # -------------------------------------------------------------- # Slots that are common to slot definition and class definition # -------------------------------------------------------------- is_a: + rank: 11 domain: definition range: definition abstract: true description: >- - specifies single-inheritance between classes or slots. + A primary parent class or slot from which inheritable metaslots are propagated from. While multiple inheritance is not allowed, mixins can be provided effectively providing the same thing. The semantics are the same when translated to formalisms that allow MI (e.g. RDFS/OWL). When translating to a SI framework (e.g. java classes, python classes) then is a is used. When translating a framework without polymorphism (e.g. json-schema, solr document schema) then is a and mixins are recursively unfolded in_subset: - - basic - - object_oriented + - SpecificationSubset + - BasicSubset + - ObjectOrientedProfile + - OwlProfile abstract: domain: definition range: boolean description: >- - an abstract class is a high level class or slot that is typically used to group common slots together and cannot - be directly instantiated. + Indicates the class or slot cannot be directly instantiated and is intended for grouping and specifying core inherited metaslots in_subset: - - basic - - object_oriented + - SpecificationSubset + - BasicSubset + - ObjectOrientedProfile mixin: aliases: - trait domain: definition range: boolean - description: this slot or class can only be used as a mixin. + description: >- + Indicates the class or slot is not intended to inherited from without being an is_a parent. mixins should not be inherited from using is_a, except by other mixins. see_also: - https://en.wikipedia.org/wiki/Mixin in_subset: - - basic - - object_oriented + - SpecificationSubset + - BasicSubset + - ObjectOrientedProfile mixins: + rank: 13 aliases: - traits domain: definition multivalued: true range: definition - description: List of definitions to be mixed in. Targets may be any definition of the same type + description: >- + A collection of secondary parent classes or slots from which inheritable metaslots are propagated from. comments: - mixins act in the same way as parents (is_a). They allow a model to have a primary strict hierachy, while keeping the benefits of multiple inheritance see_also: - https://en.wikipedia.org/wiki/Mixin in_subset: - - basic - - object_oriented + - SpecificationSubset + - BasicSubset + - ObjectOrientedProfile + - OwlProfile apply_to: domain: definition @@ -473,7 +509,8 @@ slots: todos: - this should be able to connect to an existing code set from TCCM in_subset: - - basic + - SpecificationSubset + - BasicSubset code_set_version: domain: enum_expression @@ -483,7 +520,7 @@ slots: comments: - we assume that version identifiers lexically sort in temporal order. Recommend semver when possible in_subset: - - basic + - BasicSubset code_set_tag: domain: enum_expression @@ -493,7 +530,7 @@ slots: comments: - enum_expression cannot have both a code_set_tag and a code_set_version in_subset: - - basic + - BasicSubset pv_formula: domain: enum_expression @@ -504,7 +541,8 @@ slots: - you cannot have BOTH the permissible_values and permissible_value_formula tag - code_set must be supplied for this to be valid in_subset: - - basic + - SpecificationSubset + - BasicSubset permissible_values: domain: enum_expression @@ -514,29 +552,35 @@ slots: description: >- A list of possible values for a slot range in_subset: - - basic + - SpecificationSubset + - BasicSubset enum_uri: aliases: - public ID domain: enum_definition range: uriorcurie - description: URI of the enum in an RDF environment + description: URI of the enum that provides a semantic interpretation of the element in a linked data context. The URI may come from any namespace and may be shared between schemas ifabsent: class_curie in_subset: - - basic + - SpecificationSubset + - BasicSubset include: description: An enum expression that yields a list of permissible values that are to be included, after subtracting the minus set domain: enum_expression range: anonymous_enum_expression multivalued: true + in_subset: + - SpecificationSubset minus: description: An enum expression that yields a list of permissible values that are to be subtracted from the enum domain: enum_expression range: anonymous_enum_expression multivalued: true + in_subset: + - SpecificationSubset inherits: description: An enum definition that is used as the basis to create a new enum @@ -545,27 +589,37 @@ slots: domain: enum_expression range: enum_definition multivalued: true + in_subset: + - SpecificationSubset matches: description: Specifies a match query that is used to calculate the list of permissible values domain: enum_expression range: match_query + in_subset: + - SpecificationSubset identifier_pattern: description: A regular expression that is used to obtain a set of identifiers from a source_ontology to construct a set of permissible values domain: match_query range: string + in_subset: + - SpecificationSubset concepts: description: A list of identifiers that are used to construct a set of permissible values domain: enum_expression range: uriorcurie multivalued: true + in_subset: + - SpecificationSubset reachable_from: description: Specifies a query for obtaining a list of permissible values based on graph reachability domain: enum_expression range: reachability_query + in_subset: + - SpecificationSubset source_ontology: aliases: @@ -573,6 +627,8 @@ slots: - vocabulary description: An ontology or vocabulary or terminology that is used in a query to obtain a set of permissible values range: uriorcurie + in_subset: + - SpecificationSubset is_direct: aliases: @@ -580,6 +636,8 @@ slots: description: True if the reachability query should only include directly related nodes, if False then include also transitively connected domain: reachability_query range: boolean + in_subset: + - SpecificationSubset traverse_up: aliases: @@ -587,6 +645,8 @@ slots: description: True if the direction of the reachability query is reversed and ancestors are retrieved domain: reachability_query range: boolean + in_subset: + - SpecificationSubset include_self: aliases: @@ -594,6 +654,8 @@ slots: description: True if the query is reflexive domain: reachability_query range: boolean + in_subset: + - SpecificationSubset relationship_types: description: A list of relationship types (properties) that are used in a reachability query @@ -603,26 +665,33 @@ slots: domain: reachability_query range: uriorcurie multivalued: true + in_subset: + - SpecificationSubset source_nodes: description: A list of nodes that are used in the reachability query domain: reachability_query range: uriorcurie multivalued: true + in_subset: + - SpecificationSubset # ----------------------------------- # Slots for permissible_value # ----------------------------------- text: + rank: 21 aliases: - value domain: permissible_value range: string identifier: true in_subset: - - basic + - SpecificationSubset + - BasicSubset meaning: + rank: 23 aliases: - PV meaning domain: permissible_value @@ -632,7 +701,8 @@ slots: notes: - we may want to change the range of this (and other) elements in the model to an entitydescription type construct in_subset: - - basic + - SpecificationSubset + - BasicSubset see_also: - https://en.wikipedia.org/wiki/ISO/IEC_11179 @@ -640,13 +710,16 @@ slots: # Schema definition slots # ----------------------------------- id: + rank: 0 domain: schema_definition range: uri description: The official schema URI required: true in_subset: - - minimal - - basic + - SpecificationSubset + - MinimalSubset + - BasicSubset + - OwlProfile emit_prefixes: domain: schema_definition @@ -663,16 +736,18 @@ slots: exact_mappings: - schema:schemaVersion in_subset: - - owl - - basic + - BasicSubset imports: + rank: 21 domain: schema_definition range: uriorcurie multivalued: true description: other schemas that are included in this schema in_subset: - - basic + - SpecificationSubset + - BasicSubset + - OwlProfile structured_imports: domain: schema_definition @@ -681,107 +756,122 @@ slots: description: other schemas that are included in this schema license: + rank: 31 domain: schema_definition description: license for the schema slot_uri: dcterms:license in_subset: - - owl - - basic + - BasicSubset default_curi_maps: domain: schema_definition multivalued: true description: ordered list of prefixcommon biocontexts to be fetched to resolve id prefixes and inline prefix variables in_subset: - - basic + - BasicSubset default_prefix: + rank: 11 domain: schema_definition range: string ifabsent: default_ns description: default and base prefix -- used for ':' identifiers, @base and @vocab in_subset: - - minimal - - basic + - SpecificationSubset + - MinimalSubset + - BasicSubset default_range: + rank: 13 domain: schema_definition range: type_definition description: default slot range to be used if range element is omitted from a slot definition notes: - restricted to type definitions for the time being. Could be broadened if required in_subset: - - minimal - - basic + - SpecificationSubset + - MinimalSubset + - BasicSubset subsets: + rank: 8 domain: schema_definition multivalued: true range: subset_definition inlined: true - description: list of subsets referenced in this model + description: An index to the collection of all subset definitions in the schema exact_mappings: - OIO:hasSubset in_subset: - - basic + - SpecificationSubset + - BasicSubset types: + rank: 6 domain: schema_definition range: type_definition multivalued: true inlined: true - description: data types used in the model + description: An index to the collection of all type definitions in the schema in_subset: - - basic - - object_oriented + - BasicSubset + - ObjectOrientedProfile + - OwlProfile enums: + rank: 5 domain: schema_definition range: enum_definition multivalued: true inlined: true - description: enumerated ranges + description: An index to the collection of all enum definitions in the schema in_subset: - - basic - - object_oriented + - SpecificationSubset + - BasicSubset + - ObjectOrientedProfile + - OwlProfile slot_definitions: + rank: 4 domain: schema_definition multivalued: true range: slot_definition inlined: true alias: slots - description: slot definitions + description: An index to the collection of all slot definitions in the schema in_subset: - - basic + - SpecificationSubset + - BasicSubset + - OwlProfile classes: + rank: 3 domain: schema_definition range: class_definition multivalued: true inlined: true - description: class definitions + description: An index to the collection of all class definitions in the schema in_subset: - - minimal - - basic - - relational_model - - object_oriented + - SpecificationSubset + - MinimalSubset + - BasicSubset + - RelationalModelProfile + - ObjectOrientedProfile + - OwlProfile metamodel_version: domain: schema_definition readonly: supplied by the schema loader description: Version of the metamodel used to load the schema in_subset: - - owl - - basic + - BasicSubset source_file: domain: schema_definition readonly: suppplied by the schema loader description: name, uri or description of the source of the schema in_subset: - - owl - - basic + - BasicSubset source_file_date: domain: schema_definition @@ -789,8 +879,7 @@ slots: readonly: supplied by the loader description: modification date of the source of the schema in_subset: - - owl - - basic + - BasicSubset source_file_size: domain: schema_definition @@ -798,8 +887,7 @@ slots: readonly: supplied by the schema loader description: size in bytes of the source of the schema in_subset: - - owl - - basic + - BasicSubset generation_date: domain: schema_definition @@ -807,18 +895,18 @@ slots: readonly: supplied by the schema loader description: date and time that the schema was loaded/generated in_subset: - - owl - - basic + - BasicSubset # ----------------------------------- # Class definition slots # ----------------------------------- slots: + rank: 19 domain: class_definition multivalued: true range: slot_definition - description: list of slot names that are applicable to a class + description: collection of slot names that are applicable to a class comments: - >- This defines the set of slots that are allowed to be used for a given class. @@ -826,14 +914,16 @@ slots: combination of the parent (is a) slots, mixins slots, apply to slots minus the slot usage entries. Formally, in_subset: - - basic + - SpecificationSubset + - BasicSubset slot_usage: + rank: 23 domain: class_definition multivalued: true range: slot_definition inlined: true - description: the redefinition of a slot in the context of the containing class definition. + description: the refinement of a slot in the context of the containing class definition. comments: - >- Many slots may be re-used across different classes, but the meaning of the slot may be refined by context. @@ -841,11 +931,14 @@ slots: minimal constraints. When this is subclasses, e.g. to disease-phenotype associations then slot usage may specify both local naming (e.g. subject=disease) and local constraints in_subset: - - basic + - SpecificationSubset + - BasicSubset enum_range: range: enum_expression description: An inlined enumeration + in_subset: + - SpecificationSubset range_expression: domain: slot_expression @@ -853,6 +946,8 @@ slots: description: A range that is described as a boolean expression combining existing ranges comments: - one use for this is being able to describe a range using any_of expressions, for example to combine two enums + in_subset: + - SpecificationSubset boolean_slot: abstract: true @@ -861,34 +956,47 @@ slots: description: A grouping of slots that expression a boolean operator over a list of operands any_of: + rank: 101 description: holds if at least one of the expressions hold is_a: boolean_slot range: expression exact_mappings: - sh:or + in_subset: + - SpecificationSubset exactly_one_of: + rank: 103 description: holds if only one of the expressions hold is_a: boolean_slot range: expression exact_mappings: - sh:xone + in_subset: + - SpecificationSubset none_of: + rank: 105 description: holds if none of the expressions hold is_a: boolean_slot range: expression exact_mappings: - sh:not + in_subset: + - SpecificationSubset all_of: + rank: 107 description: holds if all of the expressions hold is_a: boolean_slot range: expression exact_mappings: - sh:and + in_subset: + - SpecificationSubset preconditions: + rank: 111 aliases: - if - body @@ -899,8 +1007,11 @@ slots: inlined: true close_mappings: - swrl:body + in_subset: + - SpecificationSubset postconditions: + rank: 113 aliases: - then - head @@ -910,22 +1021,36 @@ slots: inlined: true close_mappings: - swrl:body + in_subset: + - SpecificationSubset + elseconditions: + rank: 115 aliases: - else description: an expression that must hold for an instance of the class, if the preconditions no not hold range: anonymous_class_expression inlined: true + in_subset: + - SpecificationSubset + bidirectional: aliases: - iff - if and only if description: in addition to preconditions entailing postconditions, the postconditions entail the preconditions range: boolean + in_subset: + - SpecificationSubset + open_world: description: if true, the the postconditions may be omitted in instance data, but it is valid for an inference engine to add these range: boolean + in_subset: + - SpecificationSubset + rank: + rank: 51 aliases: - order - precedence @@ -935,10 +1060,12 @@ slots: - the rank of an element does not affect the semantics slot_uri: sh:order range: integer - in_subset: - - basic exact_mappings: - qudt:order + in_subset: + - SpecificationSubset + - BasicSubset + deactivated: description: a deactivated rule is not executed by the rules engine slot_uri: sh:deactivated @@ -951,6 +1078,8 @@ slots: range: class_rule inlined: true description: the collection of rules that apply to all members of this class + in_subset: + - SpecificationSubset classification_rules: domain: class_definition @@ -958,6 +1087,8 @@ slots: range: anonymous_class_expression inlined: true description: the collection of classification rules that apply to all members of this class + in_subset: + - SpecificationSubset slot_conditions: domain: class_expression @@ -965,8 +1096,11 @@ slots: range: slot_definition inlined: true description: expresses constraints on a group of slots for a class expression + in_subset: + - SpecificationSubset attributes: + rank: 29 domain: class_definition multivalued: true range: slot_definition @@ -977,20 +1111,24 @@ slots: attributes are an alternative way of defining new slots. An attribute adds a slot to the global space in the form __ (lower case, double underscores). Attributes can be specialized via slot_usage. in_subset: - - minimal - - basic - - relational_model - - object_oriented + - SpecificationSubset + - MinimalSubset + - BasicSubset + - RelationalModelProfile + - ObjectOrientedProfile + - OwlProfile class_uri: + rank: 2 aliases: - public ID domain: class_definition range: uriorcurie - description: URI of the class in an RDF environment + description: URI of the class that provides a semantic interpretation of the element in a linked data context. The URI may come from any namespace and may be shared between schemas ifabsent: class_curie in_subset: - - basic + - SpecificationSubset + - BasicSubset subclass_of: domain: class_definition @@ -1015,27 +1153,33 @@ slots: description: indicates that the domain element consists exactly of the members of the element in the range. notes: - this only applies in the OWL generation + in_subset: + - SpecificationSubset + - OwlProfile tree_root: + rank: 31 domain: class_definition range: boolean description: indicator that this is the root class in tree structures notes: - each schema should have at most one tree root in_subset: - - basic + - SpecificationSubset + - BasicSubset unique_keys: domain: class_definition range: unique_key multivalued: true inlined: true - description: Set of unique keys for this slot + description: A collection of unique keys for this class. Unique keys may be singular or compound. exact_mappings: - owl:hasKey in_subset: - - basic - - relational_model + - SpecificationSubset + - BasicSubset + - RelationalModelProfile unique_key_name: domain: unique_key @@ -1043,8 +1187,9 @@ slots: required: true description: name of the unique key in_subset: - - basic - - relational_model + - SpecificationSubset + - BasicSubset + - RelationalModelProfile unique_key_slots: domain: unique_key @@ -1053,8 +1198,9 @@ slots: range: slot_definition description: list of slot names that form a key in_subset: - - basic - - relational_model + - SpecificationSubset + - BasicSubset + - RelationalModelProfile slot_names_unique: domain: definition @@ -1079,6 +1225,8 @@ slots: S1: Y implicitly asserts that X is an instance of C1 + in_subset: + - SpecificationSubset range: aliases: @@ -1100,12 +1248,14 @@ slots: implicitly asserts Y is an instance of C2 in_subset: - - minimal - - basic - - relational_model - - object_oriented + - SpecificationSubset + - MinimalSubset + - BasicSubset + - RelationalModelProfile + - ObjectOrientedProfile slot_uri: + rank: 2 aliases: - public ID domain: slot_definition @@ -1113,18 +1263,21 @@ slots: ifabsent: slot_curie description: predicate of this slot for semantic web application in_subset: - - basic + - SpecificationSubset + - BasicSubset multivalued: + rank: 7 domain: slot_definition range: boolean inherited: true description: true means that slot can have more than one value # ifabsent: False in_subset: - - minimal - - basic - - object_oriented + - SpecificationSubset + - MinimalSubset + - BasicSubset + - ObjectOrientedProfile inherited: domain: slot_definition @@ -1133,6 +1286,8 @@ slots: description: true means that the *value* of a slot is inherited by subclasses comments: - Inherited applies to slot values. Parent *slots* are always inherited by subclasses + in_subset: + - SpecificationSubset readonly: domain: slot_definition @@ -1158,12 +1313,16 @@ slots: * class_uri -- URI for the containing class close_mappings: - sh:defaultValue + in_subset: + - SpecificationSubset implicit_prefix: domain: slot_expression range: string description: >- Causes the slot value to be interpreted as a uriorcurie after prefixing with this string + in_subset: + - SpecificationSubset value_specification_constant: abstract: true @@ -1190,7 +1349,8 @@ slots: range: string inherited: true description: the slot must have range string and the value of the slot must equal the specified value - status: testing + in_subset: + - SpecificationSubset equals_number: is_a: list_value_specification_constant @@ -1210,7 +1370,8 @@ slots: - specify expression language see_also: - https://github.com/linkml/linkml/issues/75 - status: unstable + in_subset: + - SpecificationSubset minimum_cardinality: is_a: list_value_specification_constant @@ -1218,6 +1379,8 @@ slots: inherited: true description: the minimum number of entries for a multivalued slot status: testing + in_subset: + - SpecificationSubset maximum_cardinality: is_a: list_value_specification_constant @@ -1225,6 +1388,8 @@ slots: inherited: true description: the maximum number of entries for a multivalued slot status: testing + in_subset: + - SpecificationSubset equals_string_in: is_a: list_value_specification_constant @@ -1233,6 +1398,8 @@ slots: inherited: true description: the slot must have range string and the value of the slot must equal one of the specified values status: testing + in_subset: + - SpecificationSubset equals_number_in: is_a: list_value_specification_constant @@ -1241,6 +1408,8 @@ slots: inherited: true description: the slot must have range number and the value of the slot must equal one of the specified values status: testing + in_subset: + - SpecificationSubset has_member: is_a: list_value_specification_constant @@ -1249,6 +1418,8 @@ slots: inlined: true description: the values of the slot is multivalued with at least one member satisfying the condition status: testing + in_subset: + - SpecificationSubset all_members: is_a: list_value_specification_constant @@ -1268,26 +1439,31 @@ slots: maximum_value: 10 ``` status: testing + in_subset: + - SpecificationSubset singular_name: domain: slot_definition description: a name that is used in the singular form slot_uri: skos:altLabel in_subset: - - basic + - BasicSubset required: + rank: 8 domain: slot_definition range: boolean inherited: true description: true means that the slot must be present in the loaded definition in_subset: - - minimal - - basic - - relational_model - - object_oriented + - SpecificationSubset + - MinimalSubset + - BasicSubset + - RelationalModelProfile + - ObjectOrientedProfile recommended: + rank: 9 domain: slot_definition range: boolean inherited: true @@ -1298,7 +1474,8 @@ slots: see_also: - https://github.com/linkml/linkml/issues/177 in_subset: - - basic + - SpecificationSubset + - BasicSubset inapplicable: domain: slot_definition @@ -1307,8 +1484,11 @@ slots: description: true means that the slot must not be present comments: - By default all slots are applicable. The main use case for this slot is as an override in a subclass + in_subset: + - SpecificationSubset inlined: + rank: 25 domain: slot_definition range: boolean inherited: true @@ -1318,9 +1498,11 @@ slots: comments: - classes without keys or identifiers are necessarily inlined as lists in_subset: - - basic + - SpecificationSubset + - BasicSubset inlined_as_list: + rank: 27 domain: slot_definition range: boolean inherited: true @@ -1336,7 +1518,8 @@ slots: is the key or identifier and whose value is the one additional element. This form is still stored according to the inlined_as_list setting. in_subset: - - basic + - SpecificationSubset + - BasicSubset list_elements_ordered: domain: slot_definition @@ -1347,6 +1530,8 @@ slots: order may still be preserved but this is not guaranteed comments: - should only be used with multivalued slots + in_subset: + - SpecificationSubset list_elements_unique: domain: slot_definition @@ -1356,6 +1541,8 @@ slots: If True, then there must be no duplicates in the elements of a multivalued slot comments: - should only be used with multivalued slots + in_subset: + - SpecificationSubset shared: aliases: @@ -1366,6 +1553,8 @@ slots: inherited: true description: >- If True, then the relationship between the slot domain and range is many to one or many to many + in_subset: + - SpecificationSubset key: domain: slot_definition @@ -1380,10 +1569,12 @@ slots: - identifiers and keys are mutually exclusive. A given domain cannot have both - a key slot is automatically required. Keys cannot be optional in_subset: - - basic - - relational_model + - SpecificationSubset + - BasicSubset + - RelationalModelProfile identifier: + rank: 5 domain: slot_definition range: boolean inherited: true @@ -1403,9 +1594,10 @@ slots: - a given domain can have at most one identifier - identifiers and keys are mutually exclusive. A given domain cannot have both in_subset: - - minimal - - basic - - relational_model + - SpecificationSubset + - MinimalSubset + - BasicSubset + - RelationalModelProfile designates_type: aliases: @@ -1415,14 +1607,19 @@ slots: inherited: true description: >- True means that the key slot(s) is used to determine the instantiation (types) relation between objects and a ClassDefinition + in_subset: + - SpecificationSubset alias: + rank: 6 domain: slot_definition range: string slot_uri: skos:prefLabel description: >- the name used for a slot in the context of its owning class. If present, this is used instead of the actual slot name. + in_subset: + - SpecificationSubset owner: domain: slot_definition @@ -1474,6 +1671,8 @@ slots: range: definition multivalued: true description: Two classes are disjoint if they have no instances in common, two slots are disjoint if they can never hold between the same two instances + in_subset: + - SpecificationSubset children_are_mutually_disjoint: domain: definition @@ -1491,6 +1690,8 @@ slots: description: If s is symmetric, and i.s=v, then v.s=i close_mappings: - owl:SymmetricProperty + in_subset: + - SpecificationSubset asymmetric: is_a: relational_logical_characteristic @@ -1499,6 +1700,8 @@ slots: - asymmetry is the combination of antisymmetry and irreflexivity close_mappings: - owl:AsymmetricProperty + in_subset: + - SpecificationSubset reflexive: is_a: relational_logical_characteristic @@ -1507,48 +1710,64 @@ slots: - it is rare for a property to be reflexive, this characteristic is added for completeness, consider instead locally_reflexive close_mappings: - owl:ReflexiveProperty + in_subset: + - SpecificationSubset irreflexive: is_a: relational_logical_characteristic description: If s is irreflexive, then there exists no i such i.s=i close_mappings: - owl:IrreflexiveProperty + in_subset: + - SpecificationSubset locally_reflexive: is_a: relational_logical_characteristic description: If s is locally_reflexive, then i.s=i for all instances i where s if a class slot for the type of i + in_subset: + - SpecificationSubset transitive: is_a: relational_logical_characteristic description: If s is transitive, and i.s=z, and s.s=j, then i.s=j close_mappings: - owl:TransitiveProperty + in_subset: + - SpecificationSubset transitive_form_of: range: slot_definition description: If s transitive_form_of d, then (1) s holds whenever d holds (2) s is transitive (3) d holds whenever s holds and there are no intermediates, and s is not reflexive comments: - Example: ancestor_of is the transitive_form_of parent_of + in_subset: + - SpecificationSubset reflexive_transitive_form_of: is_a: transitive_form_of description: transitive_form_of including the reflexive case + in_subset: + - SpecificationSubset inverse: domain: slot_definition range: slot_definition description: indicates that any instance of d s r implies that there is also an instance of r s' d slot_uri: owl:inverseOf + in_subset: + - SpecificationSubset is_class_field: domain: slot_definition range: boolean - description: indicates that any instance, i, the domain of this slot will include an assert of i s range + description: indicates that for any instance, i, the domain of this slot will include an assertion of i s range role: domain: slot_definition range: string - description: the role played by the slot range + description: a textual descriptor that indicates the role played by the slot range + comments: + - the primary use case for this slot is to provide a textual descriptor of a generic slot name when used in the context of a more specific class inherited: true minimum_value: @@ -1559,7 +1778,8 @@ slots: description: for slots with ranges of type number, the value must be equal to or higher than this inherited: true in_subset: - - basic + - SpecificationSubset + - BasicSubset maximum_value: aliases: @@ -1569,37 +1789,48 @@ slots: description: for slots with ranges of type number, the value must be equal to or lowe than this inherited: true in_subset: - - basic + - SpecificationSubset + - BasicSubset interpolated: domain: pattern_expression range: boolean description: if true then the pattern is first string interpolated + in_subset: + - SpecificationSubset partial_match: domain: pattern_expression range: boolean description: if true then the pattern must match the whole string, as if enclosed in ^...$ + in_subset: + - SpecificationSubset pattern: + rank: 35 domain: definition range: string description: the string value of the slot must conform to this regular expression expressed in the string inherited: true in_subset: - - basic + - SpecificationSubset + - BasicSubset syntax: domain: pattern_expression range: string description: the string value of the slot must conform to this regular expression expressed in the string. May be interpolated. inherited: true + in_subset: + - SpecificationSubset structured_pattern: domain: definition range: pattern_expression description: the string value of the slot must conform to the regular expression in the pattern expression inherited: true + in_subset: + - SpecificationSubset string_serialization: domain: definition @@ -1622,12 +1853,15 @@ slots: inherited: false see_also: - https://github.com/linkml/issues/128 + in_subset: + - SpecificationSubset # ----------------------------------- # Slots for type definition # ----------------------------------- typeof: + rank: 7 domain: type_definition range: type_definition description: >- @@ -1637,9 +1871,11 @@ slots: - the type definition containing the typeof slot is referred to as the "child type" - type definitions without a typeof slot are referred to as a "root type" in_subset: - - basic + - SpecificationSubset + - BasicSubset base: + rank: 8 domain: type_definition description: python base type that implements this type definition inherited: true @@ -1647,9 +1883,11 @@ slots: - every root type must have a base - the base is inherited by child types but may be overriden. Base compatibility is not checked. in_subset: - - basic + - SpecificationSubset + - BasicSubset type_uri: + rank: 2 domain: type_definition range: uriorcurie alias: uri @@ -1659,15 +1897,18 @@ slots: - uri is typically drawn from the set of URI's defined in OWL (https://www.w3.org/TR/2012/REC-owl2-syntax-20121211/#Datatype_Maps) - every root type must have a type uri in_subset: - - basic + - SpecificationSubset + - BasicSubset repr: + rank: 10 domain: type_definition range: string description: the name of the python object that implements this type definition inherited: true in_subset: - - basic + - SpecificationSubset + - BasicSubset # ----------------------------------- # Slots for alt descripion @@ -1679,7 +1920,7 @@ slots: description: text of an attributed description alias: description in_subset: - - basic + - BasicSubset alt_description_source: domain: alt_description @@ -1688,17 +1929,18 @@ slots: alias: source description: the source of an attributed description in_subset: - - basic + - BasicSubset alt_descriptions: aliases: - alternate definitions domain: element + description: A sourced alternative description for an element range: alt_description multivalued: true inlined: true in_subset: - - basic + - BasicSubset # ----------------------------------- # Slots for example @@ -1708,14 +1950,14 @@ slots: description: example value slot_uri: skos:example in_subset: - - basic + - BasicSubset value_description: domain: example alias: description description: description of what the value is doing in_subset: - - basic + - BasicSubset examples: singular_name: example @@ -1727,36 +1969,41 @@ slots: close_mappings: - vann:example in_subset: - - owl - - basic + - BasicSubset # ----------------------------------- # Slots for prefix # ----------------------------------- prefix_prefix: + rank: 1 domain: prefix range: ncname key: true description: the nsname (sans ':' for a given prefix) in_subset: - - basic + - SpecificationSubset + - BasicSubset prefix_reference: + rank: 2 domain: prefix range: uri required: true description: A URI associated with a given prefix in_subset: - - basic + - SpecificationSubset + - BasicSubset prefixes: + rank: 10 domain: schema_definition range: prefix multivalued: true inlined: true description: prefix / URI definitions to be added to the context beyond those fetched from prefixcommons in id prefixes in_subset: - - basic + - SpecificationSubset + - BasicSubset # ----------------------------------- # Slots for settings @@ -1766,14 +2013,19 @@ slots: range: ncname key: true description: the variable name for a setting + in_subset: + - SpecificationSubset setting_value: domain: setting range: string required: true description: The value assigned for a setting + in_subset: + - SpecificationSubset settings: + rank: 20 domain: schema_definition range: setting multivalued: true @@ -1781,6 +2033,8 @@ slots: description: A collection of global variable settings comments: - global variables are used in string interpolation + in_subset: + - SpecificationSubset # ----------------------------------- # Slots for import expressions @@ -1831,7 +2085,8 @@ slots: - is_grouping_slot: equals_expression: True in_subset: - - basic + - SpecificationSubset + - BasicSubset is_grouping_slot: domain: slot_definition @@ -1840,7 +2095,8 @@ slots: close_mappings: - sh:PropertyGroup in_subset: - - basic + - SpecificationSubset + - BasicSubset # paths @@ -1926,7 +2182,7 @@ classes: - broad mappings - rank in_subset: - - basic + - BasicSubset element: aliases: @@ -1947,9 +2203,10 @@ classes: see_also: - https://en.wikipedia.org/wiki/Data_element in_subset: - - basic + - BasicSubset schema_definition: + rank: 1 description: "a collection of subset, type, slot and class definitions" tree_root: true aliases: @@ -1984,14 +2241,17 @@ classes: slot_usage: name: range: ncname + description: a unique name for the schema that is both human-readable and consists of only characters from the NCName set # slot_usage: # description: # slot_uri: dcterms:description in_subset: - - minimal - - basic - - relational_model - - object_oriented + - SpecificationSubset + - MinimalSubset + - BasicSubset + - RelationalModelProfile + - ObjectOrientedProfile + - OwlProfile type_expression: is_a: expression @@ -2025,9 +2285,10 @@ classes: - type_expression type_definition: + rank: 4 mixins: - type_expression - description: A data type definition. + description: an element that whose instances are atomic scalar values that can be mapped to primitive types is_a: element slots: - typeof @@ -2039,16 +2300,20 @@ classes: union_of: range: type_definition in_subset: - - basic + - SpecificationSubset + - BasicSubset + - OwlProfile subset_definition: - description: the name and description of a subset + rank: 6 + description: an element that can be used to group other metamodel elements is_a: element in_subset: - - basic + - SpecificationSubset + - BasicSubset definition: - description: "base class for definitions" + description: abstract base class for core metaclasses abstract: true is_a: element slots: @@ -2067,7 +2332,7 @@ classes: see_also: - https://en.wikipedia.org/wiki/Data_element_definition in_subset: - - basic + - BasicSubset enum_expression: description: An expression that constrains the range of a slot @@ -2091,12 +2356,13 @@ classes: - enum_expression enum_definition: + rank: 5 aliases: - enum - value set - term set - Terminology Value Set - description: List of values that constrain the range of a slot + description: an element whose instances must be drawn from a specified set of permissible values is_a: definition mixins: - enum_expression @@ -2105,9 +2371,11 @@ classes: close_mappings: - qb:HierarchicalCodeList in_subset: - - basic - - relational_model - - object_oriented + - SpecificationSubset + - BasicSubset + - RelationalModelProfile + - ObjectOrientedProfile + - OwlProfile slots: - enum_uri @@ -2116,6 +2384,8 @@ classes: slots: - identifier_pattern - source_ontology + in_subset: + - SpecificationSubset reachability_query: description: A query that is used on an enum expression to dynamically obtain a set of permissible values via walking from a set of source nodes to a set of descendants or ancestors over a set of relationship types @@ -2126,7 +2396,8 @@ classes: - is_direct - include_self - traverse_up - + in_subset: + - SpecificationSubset structured_alias: description: >- @@ -2150,7 +2421,6 @@ classes: - value: "https://w3id.org/mod#acronym" description: An acronym - expression: mixin: true abstract: true @@ -2194,7 +2464,6 @@ classes: range: path_expression all_of: range: path_expression - slot_expression: description: an expression that constrains the range of values a slot can take @@ -2244,7 +2513,8 @@ classes: status: testing slot_definition: - description: the definition of a property or a slot + rank: 3 + description: an element that describes how instances are related to other instances aliases: - slot - field @@ -2298,8 +2568,10 @@ classes: slot_usage: is_a: range: slot_definition + description: A primary parent slot from which inheritable metaslots are propagated mixins: range: slot_definition + description: A collection of secondary parent mixin slots from which inheritable metaslots are propagated apply_to: range: slot_definition disjoint_with: @@ -2307,7 +2579,10 @@ classes: union_of: range: type_definition in_subset: - - basic + - SpecificationSubset + - MinimalSubset + - BasicSubset + - OwlProfile class_expression: mixin: true @@ -2338,9 +2613,10 @@ classes: status: testing class_definition: + rank: 2 mixins: - class_expression - description: the definition of a class or interface + description: an element whose instances are complex objects that may have slot-value assignments aliases: - table - record @@ -2367,8 +2643,10 @@ classes: slot_usage: is_a: range: class_definition + description: A primary parent class from which inheritable metaslots are propagated mixins: range: class_definition + description: A collection of secondary parent mixin classes from which inheritable metaslots are propagated apply_to: range: class_definition rules: @@ -2380,10 +2658,12 @@ classes: close_mappings: - owl:Class in_subset: - - minimal - - basic - - relational_model - - object_oriented + - SpecificationSubset + - MinimalSubset + - BasicSubset + - RelationalModelProfile + - ObjectOrientedProfile + - OwlProfile class_level_rule: abstract: true @@ -2411,6 +2691,8 @@ classes: - sh:TripleRule - swrl:Imp status: testing + in_subset: + - SpecificationSubset pattern_expression: description: a regular expression pattern used to evaluate conformance of a string @@ -2440,14 +2722,18 @@ classes: slots: - setting_key - setting_value + in_subset: + - SpecificationSubset prefix: + rank: 12 description: prefix URI tuple slots: - prefix_prefix - prefix_reference in_subset: - - basic + - SpecificationSubset + - BasicSubset local_name: description: an attributed label @@ -2461,7 +2747,7 @@ classes: - value - value_description in_subset: - - basic + - BasicSubset alt_description: description: an attributed description @@ -2469,9 +2755,10 @@ classes: - alt_description_source - alt_description_text in_subset: - - basic + - BasicSubset permissible_value: + rank: 16 aliases: - PV description: a permissible value, accompanied by intended text and an optional mapping to a concept URI @@ -2490,9 +2777,11 @@ classes: - meaning - unit in_subset: - - basic + - SpecificationSubset + - BasicSubset unique_key: + rank: 20 description: a collection of slots whose values uniquely identify an instance of a class mixins: - extensible @@ -2502,14 +2791,16 @@ classes: - unique_key_name - unique_key_slots in_subset: - - basic - - relational_model + - SpecificationSubset + - BasicSubset + - RelationalModelProfile #================================== # Enumerations # #================================== enums: pv_formula_options: + rank: 1 description: >- The formula used to generate the set of permissible values from the code_set values permissible_values: @@ -2518,9 +2809,11 @@ enums: URI: The permissible values are the set of code URIs in the code set FHIR_CODING: The permissible values are the set of FHIR coding elements derived from the code set in_subset: - - basic + - SpecificationSubset + - BasicSubset presence_enum: + rank: 11 description: enumeration of conditions by which a slot value should be set permissible_values: UNCOMMITTED: @@ -2528,6 +2821,7 @@ enums: ABSENT: relational_role_enum: + rank: 3 description: enumeration of roles a slot on a relationship class can play permissible_values: SUBJECT: @@ -2551,6 +2845,7 @@ enums: description: a slot with this role connects a relationship to a node that is not subject/object/predicate alias_predicate_enum: + rank: 5 permissible_values: EXACT_SYNONYM: meaning: skos:exactMatch diff --git a/linkml_model/model/schema/types.yaml b/linkml_model/model/schema/types.yaml index c2376dfd..f1921023 100644 --- a/linkml_model/model/schema/types.yaml +++ b/linkml_model/model/schema/types.yaml @@ -63,7 +63,7 @@ types: repr: str description: A time object represents a (local) time of day, independent of any particular day notes: - - URI is dateTime because OWL reasoners don't work with straight date or time + - URI is dateTime because OWL reasoners do not work with straight date or time date: uri: xsd:date diff --git a/mkdocs.yml b/mkdocs.yml index 4d0b421c..27db40bf 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,8 +1,23 @@ site_name: "LinkML Model" theme: - name: readthedocs + name: material analytics: gtag: G-2SYBSJVZ23 + palette: + - media: '(prefers-color-scheme: light)' + scheme: default + primary: teal + accent: amber + toggle: + icon: material/lightbulb + name: Switch to light mode + - media: '(prefers-color-scheme: dark)' + scheme: slate + primary: teal + accent: amber + toggle: + icon: material/lightbulb-outline + name: Switch to dark mode plugins: - search - mermaid2 diff --git a/project.Makefile b/project.Makefile new file mode 100644 index 00000000..1bd639d1 --- /dev/null +++ b/project.Makefile @@ -0,0 +1,22 @@ + +# --------------------------------------- +# CUSTOM VALIDATION +# --------------------------------------- +EXAMPLES = relational-roles rules slot-group path unique-key inlining-union + +all-validate: $(patsubst %, validate-%, $(EXAMPLES)) +validate-%: examples/%-example.yaml + $(RUN) linkml-validate -C SchemaDefinition -s linkml_model/model/schema/meta.yaml $< + + +TITLE = "LinkML Specification" +SPEC = target/SPECIFICATION.md +target/0%.md: specification/0%.md + (cat $< && echo) > $@.tmp && mv $@.tmp $@ + +$(SPEC): $(wildcard specification/0*.md) + cat specification/0*.md > $@.tmp && mv $@.tmp $@ +SPECIFICATION.pdf: $(SPEC) + pandoc -T $(TITLE) --pdf-engine=xelatex --toc -s $< -o $@ +SPECIFICATION.html: $(SPEC) + pandoc --metadata pagetitle=$(TITLE) -f gfm --toc -s $< -o $@ diff --git a/pyproject.toml b/pyproject.toml index b0471b7f..071b7ce9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,19 @@ +[tool.poetry] +name = "linkml-project-template" +version = "0.1.0" +description = "Enter description of your project here" +authors = ["Mark A. Miller "] +license = "MIT" +include = ["README.md", "src/linkml", "project"] + +[tool.poetry.dependencies] +python = "^3.9" +linkml-runtime = "^1.3.0" + +[tool.poetry.dev-dependencies] +linkml = "^1.3.0" +mkdocs-material = "^8.2.8" + [build-system] -requires = ["setuptools", "wheel"] -build-backend = "setuptools.build_meta:__legacy__" \ No newline at end of file +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/specification/00preamble.md b/specification/00preamble.md index 93867e5b..ea347ba3 100644 --- a/specification/00preamble.md +++ b/specification/00preamble.md @@ -13,7 +13,8 @@ This document defines the structure of instance data using a functional-style sy ## Notes -More information about LinkML can be found on the [LinkML site](https://linkml.io), which includes introductory material and tutorials. It also includes a reference implementation and set of tools for working with LinkML schemas and data. +More information about LinkML can be found on the [LinkML site](https://linkml.io), which includes introductory material and [tutorials](https://linkml.io/linkml/intro/tutorial). +It also includes a reference implementation and set of tools for working with LinkML schemas and data. The specification provided here is intended to be independent of any particular tool or implementation. @@ -23,5 +24,5 @@ This is a draft specification open from comments to all. ### License -This specification, like all parts of LinkML in the public domain under a Creative Commons Zero license waiver. +This specification, like all parts of LinkML are in the public domain under a Creative Commons Zero license waiver. diff --git a/specification/01introduction.md b/specification/01introduction.md index d8d6390d..a94ddd9b 100644 --- a/specification/01introduction.md +++ b/specification/01introduction.md @@ -2,18 +2,13 @@ This document is a functional draft specification for the Linked Data Modeling Language (LinkML). -LinkML is a data modeling language for describing the structure of a collection of *instances*, -where instances are tree-like object oriented structures. Each instance instantiates a class from the LinkML *metamodel*. This is either a *primitive* class such as a scalar type, reference, enumeration, or a *class* class, which is associated with slot-value *assignments*. +LinkML is a data modeling language for describing the structure of a collection of *instances*, where instances are tree-like object-oriented structures. Instances are pieces of information that represent things of interest in a particular domain, such as individual people, biological samples, places, events, or abstract entities. -LinkML schemas also specify *rules* for determining if instances conform to a the schema, and for adding additional implicit information to an instance collection. +Instances are either primitive *types* such as numbers or strings, or objects that are typed using *classes* from a LinkML *schema*. Classes are categories or groupings of things in the domain of interest; for example, "Person", "Medical History", "Data file", or "Country". Instances can be inter-related by assigning *slot values*; for example, an instance of a Person may have values for slots "name" or "country of birth". -LinkML is independent of any programming language, and independent of -any concrete form for serializing instances of schemas. Mappings are -provided for serializing instances as JSON, YAML, RDF, flat tables, or -relational models, or for mapping to programming language structures, -but are independent of any of these. Schemas are typically expressed -using the YAML serialization, but this specification is independent of -that serialization. +LinkML schemas also specify *rules* for determining if instances conform to the schema, and for *inference* adding additional implicit slot values. + +LinkML is independent of any programming language, database technology, and is independent of any concrete form for serializing instances of schemas. Mappings are provided for serializing instances as JSON, YAML, RDF, flat tables, or relational models, or for mapping to programming language structures. However, the structure and semantics of LinkML are independent from any of these. Schemas are typically expressed using the YAML serialization, but this specification is defined independent of that particular serialization. LinkML is self-describing, and any LinkML schema is itself a collection instances that instantiates elements in a special schema called the *LinkML metamodel*. @@ -59,28 +54,27 @@ This section. Provides background information and preliminary definitions ### Part 2: Structure and Syntax of Instances -This specifies what an instance is in the context of LinkML. +Specification of the data model for instances in LinkML. -The instance data model is shown as UML. A normative functional-style syntax is provided for instances, and this syntax is used throughout the -specification. +The data model shown as UML for informative purposes. A normative functional-style syntax is provided for instances, and this syntax is used throughout the specification. -This also introduces a path accessor syntax for specifying how to traverse LinkML instances. +This section also introduces a **path accessor** syntax for specifying how to traverse LinkML instances. ### Part 3: Structure of Schemas -This section specifies the core elements of a LinkML schema. +Specification of the core elements of a LinkML schema: ClassDefinitions, TypeDefinitions, SlotDefinitions, EnumDefinitions, as well as ancillary structures. ### Part 4: Derived Schemas and Schema Semantics -This section specifies rules for inferring derived schemas, which can be used for purposes such as validation. +Specification of inference procedures for **derived schemas**, which can be used for purposes such as validation. ### Part 5: Validation of Instance Data -This section specifies the procedure for validating LinkML instances using a derived schema +Specification of the procedure for **validating** LinkML instances using a derived schema ### Part 6: Mapping of Instance Data -This section specifies how LinkML instances are mapped to other data models and syntaxes, including: +Specification of how LinkML instances are mapped to other data models and syntaxes: - JSON/YAML -- RDF +- RDF and JSON-LD diff --git a/specification/02instances.md b/specification/02instances.md index f5d5e837..ff4bef40 100644 --- a/specification/02instances.md +++ b/specification/02instances.md @@ -2,48 +2,71 @@ ## Functional Instance Syntax and Structure -This specification provides a grammar for a functional syntax for +This specification provides a grammar for a **functional syntax** for expressing LinkML instances. This syntax is not intended for data -exchange, but instead for defining the structure of LinkML -instances. Mappings are provided later for other serializations. +exchange, but instead for unambiguous describing data in LinkML. + +[Section 6](06mapping) specifies how the instance model is converted to JSON, YAML, and RDF. ### Instances -An instance is either one of four *definition* types, or a list of instances, or the special token `None`: +An instance is either one of four *definition* types, or a collection, or the special token `None`: -> **Instance** := **None** | **ClassDefinitionInstance** | **TypeDefinitionInstance** | **EnumDefinitionInstance** | **ClassDefinitionReferenceInstance** | **CollectionInstance** +> **Instance** := **None** | **InstanceOfClass** | **AtomicInstance** | **CollectionInstance** + +> **AtomicInstance** := **InstanceOfType** | **InstanceOfEnum** | **InstanceOfReference** ```mermaid classDiagram - Instance <|-- ClassDefinitionInstance + Instance <|-- InstanceOfClass Instance <|-- AtomicInstance - AtomicInstance <|-- ClassDefinitionReferenceInstance - AtomicInstance <|-- TypeDefinitionInstance - AtomicInstance <|-- EnumDefinitionInstance + AtomicInstance <|-- InstanceOfReference + AtomicInstance <|-- InstanceOfType + AtomicInstance <|-- InstanceOfEnum Instance <|-- CollectionInstance Instance <-- None ``` +### Definition Types and Names + +Definition names are used to unambiguously indicate definitions specified in a **Schema** (described in the [next section](03schemas)): + +> **ClassDefinitionName** := **ElementName** + +> **ClassDefinitionReferenceName** := **ElementName** +> **TypeDefinitionName** := **ElementName** -### instances of ClassDefinition +> **EnumDefinitionName** := **ElementName** -A **ClassDefinitionInstance** is a pair consisting of (1) a -ClassDefinition *Name* and (2) zero to many *Assignments**, where each -assignment is a pair of a slot (attribute) and an instance value. +> **SlotDefinitionName** := **ElementName** -**ClassDefinitionInstance** := **ClassDefinitionName** '(' <**Assignment**>List ')' +> **ElementName** := *a finite sequence of characters matching the PN_LOCAL production of [SPARQL](https://www.w3.org/TR/rdf-sparql-query/) and not matching any of the keyword terminals of the syntax* -**Assignment** := **SlotName** '=' **Instance** +Note that the grammar needs a table mapping names to types in order to +unambiguously parse a serialization in functional syntax + +names must not be shared across definition types + + + +### Instances of Classes + +An **InstanceOfClass** is a pair consisting of (1) a ClassDefinition *Name* that indicates the *instantiation type* of the instance, and (2) zero to many *Assignments**, where each +assignment is a key-value pair of a **SlotName** and an **Instance** value. + +> **InstanceOfClass** := **ClassDefinitionName** '(' <**Assignment**>List ')' + +> **Assignment** := **SlotName** '=' **Instance** ```mermaid classDiagram - Instance <|-- ClassDefinitionInstance - ClassDefinitionInstance "0..*" *--> Assignment + Instance <|-- InstanceOfClass + InstanceOfClass "1" --> "1..*" Assignment Assignment "1" --> Instance - class ClassDefinitionInstance { + class InstanceOfClass { +ClassDefinitionName type +Assignment assignments } @@ -53,104 +76,85 @@ classDiagram } ``` -No SlotName can appear twice in any ClassDefinitionInstance (i.e. SlotName is a key) - -For example, given a class name **Person**, a ClassDefinition instance might look like: +No SlotName can appear twice in any set of Assignments (i.e. SlotName is a key) +An example instance might be written in functional syntax as: ```python Person(id=..., name=..., - ) + age=..., + ) ``` -The set of permitted slots for a class is defined by LinkML -*schema*. An instance can be syntactically valid (i.e conforming to the -grammar defined in this section) while structurally invalid (i.e not -conforming to a schema). +where this instantiates the class with name "Person". -### Primitive Instances +### Primitive (Atomic) Instances -There are 3 types of primitive instances, each is a pair consisting of (1) the definition name and (2) an atomic value +There are 3 types of primitive instances, each is a pair consisting of (1) a *Name* of the element instantiated (2) an atomic value -**TypeDefinitionInstance** := **TypeDefinitionName** '(' **AtomicValue** ')' +**InstanceOfType** := **TypeDefinitionName** '(' **AtomicValue** ')' -**EnumDefinitionInstance** := **EnumDefinitionName** '(' **AtomicValue** ')' +**InstanceOfEnum** := **EnumDefinitionName** '(' **AtomicValue** ')' -**ClassDefinitionReferenceInstance** := **ClassDefinitionReferenceName** '(' **AtomicValue** ')' +**InstanceOfReference** := **ClassDefinitionReferenceName** '(' **AtomicValue** ')' ```mermaid classDiagram Instance <|-- AtomicInstance - AtomicInstance <|-- ClassDefinitionReferenceInstance - AtomicInstance <|-- TypeDefinitionInstance - AtomicInstance <|-- EnumDefinitionInstance + AtomicInstance <|-- InstanceOfReference + AtomicInstance <|-- InstanceOfType + AtomicInstance <|-- InstanceOfEnum class Instance { } class AtomicInstance { +AtomicValue value } - class ClassDefinitionReferenceInstance { + class InstanceOfReference { +ClassDefinitionName type } - class TypeDefinitionInstance { + class InstanceOfType { +TypeDefinitionName type } - class EnumDefinitionInstance { + class InstanceOfEnum { +EnumDefinitionName type } ``` -For example, given a type name **PhoneNumber**, a TypeDefinition instance might look like: - -```python -PhoneNumber("+1 800 555 0100") -``` - -And given a ClassDefinitionReference **PersonId**, a ClassDefinitionReference instance might look like: - -```python -PersonId("SSN:456") -``` - -This MAY be the same string used to identify an instance of a ClassDefinition `Person(id="SSN:456")`, but this is not required. - -The semantics of these primitive instance types are defined by a *schema* ### Atomic Values An atomic value is either a string or number or boolean, where numbers can be floating points, decimals, or integers. -**AtomicValue** := **StringValue** | **NumberValue** | **BooleanValue** +> **AtomicValue** := **StringValue** | **NumberValue** | **BooleanValue** -**StringValue** := *a finite sequence of characters in which " (U+22) and \ (U+5C) occur only in pairs of the form \" (U+5C, U+22) and \\ (U+5C, U+5C), enclosed in a pair of " (U+22) characters* +> **StringValue** := *a finite sequence of characters in which " (U+22) and \ (U+5C) occur only in pairs of the form \" (U+5C, U+22) and \\ (U+5C, U+5C), enclosed in a pair of " (U+22) characters* -**NumberValue** := **FloatingPointValue** | **DecimalValue** | **IntegerValue** +> **NumberValue** := **FloatingPointValue** | **DecimalValue** | **IntegerValue** -**FloatingPointValue** ::= [ '+' | '-'] ( **Digits** ['.'**Digits**] [ **Exponent** ] | '.' **Digits** [ **Exponent** ]) ( 'f' | 'F' ) +> **FloatingPointValue** ::= [ '+' | '-'] ( **Digits** ['.'**Digits**] [ **Exponent** ] | '.' **Digits** [ **Exponent** ]) ( 'f' | 'F' ) -**Exponent** ::= ('e' | 'E') ['+' | '-'] **Digits** +> **Exponent** ::= ('e' | 'E') ['+' | '-'] **Digits** -**DecimalValue** ::= ['+' | '-'] **Digits** '.' **Digits** +> **DecimalValue** ::= ['+' | '-'] **Digits** '.' **Digits** -**IntegerValue** ::= ['+' | '-'] **Digits** +> **IntegerValue** ::= ['+' | '-'] **Digits** +> **NonNegativeInteger** ::= **Zero** | **PositiveInteger** -**NonNegativeInteger** ::= **Zero** | **PositiveInteger** +> **PositiveInteger** ::= **NonZero** { **Digit** } -**PositiveInteger** ::= **NonZero** { **Digit** } +> **Digits** ::= **Digit** { **Digit** } -**Digits** ::= **Digit** { **Digit** } +> **Digit** ::= **Zero** | **NonZero** -**Digit** ::= **Zero** | **NonZero** +> **NonZero** := '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' -**NonZero** := '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' +> **Zero** ::= '0' -**Zero** ::= '0' - -**BooleanValue** ::= 'True' | 'False' +> **BooleanValue** ::= 'True' | 'False' Examples of atomic values are: @@ -158,32 +162,59 @@ Examples of atomic values are: * `5` -- an IntegerValue * `"Alex"` -- a StringValue +#### Atomic Instance Examples + +An InstanceOfType instance might look like: + +```python +Integer(23) +``` + +For this to be a valid InstanceOfType, "Integer" must be the name of a TypeDefinition in the schema + +Another example: + +```python +PhoneNumber("+1 800 555 0100") +``` + +For this to be a valid InstanceOfType, "PhoneNumber" must be the name of a TypeDefinition in the schema + +If the schema includes a ClassReference "PersonId" then the following is a valid InstanceOfReference + +```python +PersonId("SSN:456") +``` + +This MAY be the same string used to identify an instance of a ClassDefinition `Person(id="SSN:456")`, but this is not required. + ### Collections A collection is zero or more instances, serialized as a comma-delimited list: -**CollectionInstance** := '[' <**Instance**>List ']' +> **CollectionInstance** := '[' <**Instance**>List ']' ```mermaid classDiagram Instance <|-- CollectionInstance -CollectionInstance "0..*" --> Instance +CollectionInstance "*" --> "0..*" Instance ``` -Examples of lists: +Examples of collections: * `[String("A"), String("B"), Integer(5)]` -- a list of primitive instances -* `[Person(name=...), Person(name=...)]` -- a list of class instances -* `[]` -- an empty list +* `[Person(name=..., ...), Person(name=..., ...)]` -- a list of class instances +* `[Person(name=..., ...), Integer(5), None]` -- a heterogeneous collection +* `[]` -- an empty collection ### None (Null) instances A special symbol is included to indicate null or missing values -**None** := 'None' +> **None** := 'None' -An assignment of a slot to None is equivalent to omitting that assigment. I.e. the following two instance serializations are equivalent: +An assignment of a slot to None is equivalent to omitting that assignment. I.e. the following two instance serializations are equivalent: ``` Person(address=None) @@ -193,30 +224,11 @@ Person(address=None) Person() ``` -### Definition Names - -Definition names are used to unambiguously indicate definitions specified in a schema - -**ClassDefinitionName** := **Name** -**ClassDefinitionReferenceName** := **Name** -**TypeDefinitionName** := **Name** +### Combined Example -**EnumDefinitionName** := **Name** - -**SlotDefinitionName** := **Name** - -**Name** := *a finite sequence of characters matching the PN_LOCAL production of [SPARQL] and not matching any of the keyword terminals of the syntax* - -Note that the grammar needs a table mapping names to types in order to -unambiguously parse a serialization in functional syntax - -names must not be shared across definition types - -### Example - -The following is an example of an instance of a ClassDefinition called **Person**: +The following is an example of an **InstanceOfClass** where the instantiated type is a class with name "Person": ```python Person( @@ -237,11 +249,9 @@ Person( ) ``` -See (YAML-Mapping)[#YAML-Mapping] for an example of the equivalent structure expressed in YAML. - -Parsing this requires a table that maps definition names to schema element types: +Parsing this requires a schema that assigns element names to schema elements: -|Definition Name|Type| +|Element Name|Schema Type| |---|---| |Person|ClassDefinition| |Measurement|ClassDefinition| @@ -253,14 +263,18 @@ Parsing this requires a table that maps definition names to schema element types For two instances `i` and `j` to be identical they must be of the same metatype. -Identity conditions for two primitive instances are satisfied if both DefinitionName and AtomicValue match +Identity conditions for two primitive instances are satisfied if both **DefinitionName** and **AtomicValue** match + +* `i=()` +* `j=()` +* `i=j iff DefinitionName_i = DefinitionName_j and AtomicValue_i=AtomicValue_j` **None** is identical to itself -Two collections are identical if they are of the same length and each member of `i` is identical to at least one member of `j` +Two collections are identical if they are of the same length and each member of `i` is identical to at least one member of `j`, where each match must be unique -Two ClassDefinition instances are identical if the instantiated ClassDefinitionName is identical and each non-None assignment -in *i* is identical an assignment in `j` and, and each non-None assignment in `j` is identical to an assignment in `i` +Two ClassDefinition instances are identical if the instantiated **ClassDefinitionName** is identical and each non-**None** assignment +in `i` is identical an assignment in `j` and, and each non-None assignment in `j` is identical to an assignment in `i` Two assignments are identical if the slot name is the same, and the value is identical @@ -269,21 +283,21 @@ Two assignments are identical if the slot name is the same, and the value is ide The abstract syntax provided here is intended as a normative syntax for the purposes of specifying the semantics of LinkML. -See future sections for details on how this functional syntax maps to other serialization +See [section 6](06mapping.md) for details on how this functional syntax maps to other serialization syntaxes and models such as JSON and RDF. ## Instance Accessor Syntax For a given instance `i`, *accessor* syntax can be used to dereference values. -**Path** := **Source** { **PathExtension** } +> **Path** := **SlotDefinitionName** { **PathExtension** } -**PathExtension** := '.' **SlotDefinitionName** | '[' **Identifier** ']' +> **PathExtension** := '.' **SlotDefinitionName** | '[' **Identifier** ']' To interpret an accessor for a given instance *i*: -- if the path extension is `.` then *i* must be a ClassDefinition instance, and the value is equal to the value of the slot assignment for slot `s` -- if the path extension is `[]` then *i* must be a Collection instance, and the value is equals to the member of that list that has a slot with the role of *identifier* whose value is `` +- if the path extension is `.` then *i* must be an **InstanceOfClass*, and the value is equal to the value of the slot assignment for slot `s` +- if the path extension is `[]` then *i* must be an **InstanceOfCollection**, and the value is equals to the member of that list that has a slot with the role of *identifier* whose value is `` For example, if *i* is equal to the Person instance in the example above: diff --git a/specification/03schemas.md b/specification/03schemas.md index 8e9d9b8f..098bb46b 100644 --- a/specification/03schemas.md +++ b/specification/03schemas.md @@ -1,152 +1,35 @@ -# LinkML Schemas +# Schema Datamodel -This section describes the *structure* of a LinkML schema. +This section describes the *structure* of a LinkML schema. For precise interpretation of these structures, refer to the following two sections on *derived schemas* and *validation* ## Schema Basics A LinkML **schema** specifies rules and structural conformance conditions for *instances*. Schemas allow for: - parsing of instance **serializations** to LinkML instance structures -- structurally and semantically validating LinkML instance structures -- inferring missing values in LinkML instance structures +- structurally and semantically **validating** LinkML instance structures +- **inference** of missing values in LinkML instance structures -Every LinkML schema `m` is itself an instance of a special class `SchemaDefinition` that forms part of a special schema called the *LinkML metamodel* (abbreviated `MM`). -There is exactly one metamodel. +Every LinkML schema *m* is itself an instance of a special class [SchemaDefinition](https://w3id.org/linkml/SchemaDefinition) that forms part of a special schema called the **LinkML metamodel**, which is denoted as *mM*. There is exactly one metamodel. In this specification: - - Classes in `MM` are called Metaclasses - - Slots in `MM` are called Metaslots + - ClassDefinitions in *mM* are called **metaclasses** + - SlotDefinitions in *mM* are called **metaslots** -The metamodel is itself expressed in LinkML, and the latest version can be browsed online +The metamodel is itself expressed in LinkML, and the latest version can found from canonical URLs: -* [SchemaDefinition](https://w3id.org/linkml/SchemaDefinition) +* [https://w3id.org/linkml/SchemaDefinition](https://w3id.org/linkml/SchemaDefinition) -- generated documentation +* [https://w3id.org/linkml/meta.yaml](https://w3id.org/linkml/meta.yaml) -- canonical YAML serialization -A schema is canonically represented in YAML. This representation is the YAML serialization of a SchemaDefinition instance following the rules for YAML -serialization in the mapping section. +This specification specifies the *normative elements* necessary to specify the behavior of LinkML schemas. Schemas may have additional +elements provided in the metamodel. For example, elements in schemas can have *informative* slot assignments for slots such as [title](https://w3id.org/linkml/title), [description](https://w3id.org/linkml/description), and so on, but these slots are not described in this specification as they are not normative and do not affect the formal interpretation of schemas. -This specification specifies the *minimal elements* necessary to specify the behavior of LinkML schemas. Schemas may have additional -elements provided in the metamodel. +### YAML representation of schemas -Formally a schema `m` consists at least of the following *elements*: +Because schemas and all the parts of a schema are instances of metaclasses in the metamodel, the YAML serialization rules from [section 6](06mapping) can be applied to both serialized and deserialized a schema as YAML. - * class definitions `MC = {c1,...}`, which group *instances* - * slot definitions `MS = {s1, ...}`, which describe how instances relate to other instances - * enum definitions `ME = {e1, ...}`, enumerated values (value sets) - * Enum definitions `MT = {t1, ...}`, scalar/atomic Enums, such as integers, strings - * subset definitions `MP = {ss1, ..}`, which partition model elements into groupings or views - * URI prefixes `MU = {prefix1, ..}`, which partition model elements into groupings or views - * imports `MI = {imp1, ..}`, which reference schemas that are reused - * class definition references `MR = {r1,...}`, which group *pointers to instances* - * model-level metadata - -These are stored in the SchemaDefinition instance as follows: - -|Path|Element| -|---|---| -| `m.classes` | `MC` | -| `m.slots` | `MS` | -| `m.enums` | `ME` | -| `m.Enums` | `MT` | -| `m.subsets` | `MP` | -| `m.imports` | `MI` | -| `m.prefixes` | `MU` | -| `m.` | model-level metadata | - -the high level schema can be depicted as: - -```mermaid -classDiagram - SchemaDefinition "*"-->ClassDefinition: classes - SchemaDefinition "*"-->SlotDefinition: slots - SchemaDefinition "*"-->EnumDefinition: enums - SchemaDefinition "*"-->EnumDefinition: Enums - SchemaDefinition "*"-->SubsetDefinition: subsets - SchemaDefinition "*"-->Prefix: prefixes - ClassDefinition --|> Element - SlotDefinition --|> Element - EnumDefinition --|> Element - EnumDefinition --|> Element - class SchemaDefinition { - +Uri id - +NcName name - +UriOrCurie[] imports - +Ncname default_prefix - +Uri default_range - } -``` - -The metamodel makes use of the standard *types* library, see below - -For example, consider a schema that models representations of individual people and organizations they belong to may include a class definition `Person`, and slot definitions for `name`, `address`, `relationships` and so on. - -Because schemas, are instances of the metamodel, this hypothetical schema may be serialized in functional instance syntax as follows: - -``` -SchemaDefinition( - id=String('http://example.org/organization'), - classes=[ - ClassDefinition( - name=String("Person"), - slots=[ - String("id"), - String("name"), - String("height"), - String("age"), - ... - ] - ), - ClassDefinition( - name=String("Organization"), - slots=[ - String("id"), - ... - ] - ), - ... - ], - slots=[ - SlotDefinition( - name=String("id"), - identifier=True, - description=String("..."), - range=String("String"), - ... - ), - SlotDefinition( - name=String("name"), - description=String("..."), - range=String("String"), - ... - ) - ], - enums=[ - EnumDefinition( - name=String("JobCode"), - permissible_values=[...], - ) - ], - Enums=[ - EnumDefinition( - name=String("Date"), - ... - ), - EnumDefinition( - name=String("String"), - ... - ), - ] -) -``` - -This maps to: - -|Example Model: Organization Schema| -|---| -|`MC` Classes: *Person*, *Organization*, *Address*, ...| -|`MS` Slots: *id*, *name*, *date_of_birth*, *employed_at*, *lives_at*, ...| -|`ME` Enums: *JobCode*, ...| -|`MT` Enums: *Date*, *String*, ...| +### Analogies to other modeling frameworks To help understand the basic concepts, it can be helpful to think about analogous structures in other frameworks. However, it should be understood these are not equivalents. @@ -161,63 +44,156 @@ However, it should be understood these are not equivalents. - properties in JSON-Schema - rdf:Property entities in RDFS/OWL * EnumDefinitions are analogous to: - - [enumerated Enums](https://en.wikipedia.org/wiki/Enumerated_Enum) in programming languages and some relational systems + - [enumerated types](https://en.wikipedia.org/wiki/Enumerated_type) in programming languages and some relational systems + - drop-down selections in spreadsheets - Note however that in LinkML enums are optionally backed by stronger semantics with enum elements (permissible values) mapped to vocabularies or ontologies * EnumDefinitions are analogous to: - - [data Enums](https://en.wikipedia.org/wiki/Data_Enum) in most OO languages - - primitive Enums in database systems - - extensible Enums in some systems + - [data types](https://en.wikipedia.org/wiki/Data_type) in most object-oriented languages + - primitive types in database systems + - extensible types in some systems - rdf:Literals in RDF - - DataEnums in OWL + - Datatypes in OWL -## Elements and Expressions +## SchemaDefinition Metaclass -For each of the 4 core element Enums ``, there are a triad of 3 classes in the metamodel: +* metamodel documentation: [SchemaDefinition](https://w3id.org/linkml/SchemaDefinition) -* `Expression` - * `AnonymousExpression` - * `Definition` +### SchemaDefinition: Normative subset metaslots + +A schema *m* is an instance of a SchemaDefinition, with normative elements: + +| Name | Cardinality and Range | Description | +| --- | --- | --- | +| [id](id.md) | 1..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | The official schema URI | +| [name](name.md) | 1..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | a unique name for the schema that is both human-readable and consists of only... **identifier** | +| [classes](classes.md) | 0..*
[ClassDefinition](ClassDefinition.md) | An index to the collection of all class definitions in the schema | +| [slot_definitions](slot_definitions.md) | 0..*
[SlotDefinition](SlotDefinition.md) | An index to the collection of all slot definitions in the schema | +| [enums](enums.md) | 0..*
[EnumDefinition](EnumDefinition.md) | An index to the collection of all enum definitions in the schema | +| [subsets](subsets.md) | 0..*
[SubsetDefinition](SubsetDefinition.md) | An index to the collection of all subset definitions in the schema | +| [prefixes](prefixes.md) | 0..*
[Prefix](Prefix.md) | prefix / URI definitions to be added to the context beyond those fetched from... | +| [default_prefix](default_prefix.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | default and base prefix -- used for ':' identifiers, @base and @vocab | +| [default_range](default_range.md) | 0..1
[TypeDefinition](TypeDefinition.md) | default slot range to be used if range element is omitted from a slot definit... | +| [settings](settings.md) | 0..*
[Setting](Setting.md) | A collection of global variable settings | +| [imports](imports.md) | 0..*
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | other schemas that are included in this schema | +| [rank](rank.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | the relative order in which the element occurs, lower values are given preced... | +| [id_prefixes](id_prefixes.md) | 0..*
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the identifier of this class or slot must begin with the URIs referenced by t... | +| [from_schema](from_schema.md) | 0..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | id of the schema that defined the element | -depicted as: + +### SchemaDefinition: Normative subset UML + +A subset of the above normative slots are depicted as follows: ```mermaid classDiagram -T_Expression "*" --> AnonymousT_Expression: -class T_Definition { - +T_Name name -} -T_Definition --|> T_Expression -AnonymousT_Expression --|> T_Expression + SchemaDefinition "1" --> "*" ClassDefinition: classes + SchemaDefinition "1" --> "*" SlotDefinition: slots + SchemaDefinition "1" --> "*" EnumDefinition: enums + SchemaDefinition "1" --> "*" EnumDefinition: Enums + SchemaDefinition "1" --> "*" SubsetDefinition: subsets + SchemaDefinition "1" --> "*" Prefix: prefixes + ClassDefinition --|> Element + SlotDefinition --|> Element + EnumDefinition --|> Element + EnumDefinition --|> Element + class SchemaDefinition { + +Uri id + +NcName name + +UriOrCurie[] imports + +Ncname default_prefix + +Uri default_range + } ``` -For many purposes, all that is required is the Definition element. The purpose of the above abstraction is to allow -composition of anonymous expressions using boolean operators. For example, we may want to refer to the union of collection -of ClassDefinitions. +### Schema example, functional syntax -## Metaclasses +The skeleton of a schema instance serialized as in functional syntax might look like: -* metamodel documentation: [Element](https://w3id.org/linkml/Element) +``` +SchemaDefinition( + id=String('http://example.org/organization'), + imports=[...], + prefixes=[...], + classes=[...], + slots=[...], + enums=[...], + types=[...], +) +``` + +### Schema example, YAML + +The skeleton of the above schema instance serialized as YAML might look like: + +```yaml +id: http://example.org/personinfo +imports: + ... +prefixes: + ... +classes: + ... +slots: + ... +enums: + ... +types: + ... +``` -The following describes the structure of schema elements. Each element of a schema instantiates a *Metaclass* -### ClassDefinition +## ClassDefinition Metaclass * metamodel documentation: [ClassDefinition](https://w3id.org/linkml/ClassDefinition) -Instances of **ClassDefinition** are *instantiable*. +Instances of **ClassDefinition** are themselves *instantiable*. For example, a schema may contain a class definition "Person". This class definition instantiates the metaclass **ClassDefinition**, and can have instances, of actual persons. Any LinkML instance that instantiates a ClassDefinition will have zero to many slot-value assignments, constrained by rules that operate off of the metaslot assignments of that class. -ClassDefinition inherits from both Element and ClassExpression. A ClassDefinition must have a *name*. -ClassDefinitions may refer to **AnonymousClassExpression**s as part of boolean expressions. +### ClassDefinition: Normative subset metaslots + +Any instance *c* of a ClassDefinition may have assignments in any of the following normative metaslots + +| Name | Cardinality and Range | Description | +| --- | --- | --- | +| [name](name.md) | 1..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the unique name of the element within the context of the schema **identifier** | +| [class_uri](class_uri.md) | 0..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | URI of the class that provides a semantic interpretation of the element in a ... | +| [is_a](is_a.md) | 0..1
[ClassDefinition](ClassDefinition.md) | A primary parent class from which inheritable metaslots are propagated | +| [mixins](mixins.md) | 0..*
[ClassDefinition](ClassDefinition.md) | A collection of secondary parent mixin classes from which inheritable metaslo... | +| [slots](slots.md) | 0..*
[SlotDefinition](SlotDefinition.md) | collection of slot names that are applicable to a class | +| [slot_usage](slot_usage.md) | 0..*
[SlotDefinition](SlotDefinition.md) | the refinement of a slot in the context of the containing class definition | +| [attributes](attributes.md) | 0..*
[SlotDefinition](SlotDefinition.md) | Inline definition of slots | +| [tree_root](tree_root.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | indicator that this is the root class in tree structures | +| [rank](rank.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | the relative order in which the element occurs, lower values are given preced... | +| [any_of](any_of.md) | 0..*
[AnonymousClassExpression](AnonymousClassExpression.md) | holds if at least one of the expressions hold | +| [exactly_one_of](exactly_one_of.md) | 0..*
[AnonymousClassExpression](AnonymousClassExpression.md) | holds if only one of the expressions hold | +| [none_of](none_of.md) | 0..*
[AnonymousClassExpression](AnonymousClassExpression.md) | holds if none of the expressions hold | +| [all_of](all_of.md) | 0..*
[AnonymousClassExpression](AnonymousClassExpression.md) | holds if all of the expressions hold | +| [union_of](union_of.md) | 0..*
[ClassDefinition](ClassDefinition.md) | indicates that the domain element consists exactly of the members of the elem... | +| [unique_keys](unique_keys.md) | 0..*
[UniqueKey](UniqueKey.md) | A collection of unique keys for this class | +| [rules](rules.md) | 0..*
[ClassRule](ClassRule.md) | the collection of rules that apply to all members of this class | +| [classification_rules](classification_rules.md) | 0..*
[AnonymousClassExpression](AnonymousClassExpression.md) | the collection of classification rules that apply to all members of this clas... | +| [disjoint_with](disjoint_with.md) | 0..*
[ClassDefinition](ClassDefinition.md) | Two classes are disjoint if they have no instances in common, two slots are d... | +| [slot_conditions](slot_conditions.md) | 0..*
[SlotDefinition](SlotDefinition.md) | expresses constraints on a group of slots for a class expression | +| [abstract](abstract.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | Indicates the class or slot cannot be directly instantiated and is intended f... | +| [mixin](mixin.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | Indicates the class or slot is not intended to inherited from without being a... | +| [string_serialization](string_serialization.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | Used on a slot that stores the string serialization of the containing object | +| [id_prefixes](id_prefixes.md) | 0..*
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the identifier of this class or slot must begin with the URIs referenced by t... | +| [from_schema](from_schema.md) | 0..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | id of the schema that defined the element | + + + +### ClassDefinition: UML + +A subset of the above normative slots are depicted as follows: ```mermaid classDiagram -ClassExpression "*" --> AnonymousClassExpression: any_of -ClassExpression "*" --> AnonymousClassExpression: exactly_one_of -ClassExpression "*" --> AnonymousClassExpression: none_of -ClassExpression "*" --> AnonymousClassExpression: all_of +ClassExpression "1" --> "*" AnonymousClassExpression: any_of +ClassExpression "1" --> "*" AnonymousClassExpression: exactly_one_of +ClassExpression "1" --> "*" AnonymousClassExpression: none_of +ClassExpression "1" --> "*" AnonymousClassExpression: all_of ClassDefinition --|> ClassExpression: mixin ClassDefinition --|> Element: is_a AnonymousClassExpression --|> ClassExpression @@ -226,29 +202,47 @@ class ClassDefinition { +boolean abstract +boolean mixin } -ClassDefinition "*"--> SlotDefinition: slots -ClassDefinition "*"--> SlotDefinition: slot_usage -ClassDefinition "*"--> AttributeDefinition: slot_usage -ClassDefinition "0..1"--> ClassDefinition: is_a -ClassDefinition "*"--> ClassDefinition: mixins -ClassDefinition "*"--> AnonymousClassExpression: classification_rules -ClassDefinition "*"--> ClassRule: rules +ClassDefinition "*" --> "*" SlotDefinition: slots +ClassDefinition "1" --> "*" SlotDefinition: slot_usage +ClassDefinition "1" --> "*" SlotDefinition: attributes +ClassDefinition "*"--> "0..1" ClassDefinition: is_a +ClassDefinition "1" --> "*" ClassDefinition: mixins +ClassDefinition "1" --> "*" AnonymousClassExpression: classification_rules +ClassDefinition "1" --> "*" ClassRule: rules ``` -* `name` is a required field that uniquely identifiers the **ClassDefinition** within a schema -* `slots` is a list of slot names indicating the slots that are applicable for that class -* `slot_usage` is a list of slots that specifies more specific usage conditions for the slot in the context of this class -* `attributes` is a list of slots that are intended to be local to the class -* `is_a` is an optional parent class which this class inherits from -* `mixins` is a list of parent mixin class which this class inherits from -* `abstract` is a boolean indicating whether the class is intended to be directly instantiated -* `mixin` is a boolean indicating whether the class is intended to be uses as a mixin +### ClassExpressions and anonymous ClassExpressions + +Note this metaclass exemplifies a pattern that is reused by TypeDefinition, SlotDefinition and EnumDefinition metaclasses, below. + +For the core definition types ``, there are a triad of 3 metaclasses in the metamodel: + +* `Expression` + * `AnonymousExpression` + * `Definition` + +depicted as: + +```mermaid +classDiagram +D_Expression "*" --> AnonymousD_Expression: +class D_Definition { + +D_Name name +} +D_Definition --|> D_Expression +AnonymousT_Expression --|> D_Expression +``` + +For many purposes, all that is required is the Definition element. The purpose of the above abstraction is to allow +composition of anonymous expressions using boolean operators. For example, we may want to refer to the union of collection +of ClassDefinitions. + +### Class Definition Example, Functional Syntax -An example collection of ClassDefinitions in a schema specified using the functional syntax might be: +A collection of ClassDefinition instances might look ```python -SchemaDefinition( - classes=[ +[ ClassDefinition( name=String("NamedThing"), abstract=True, @@ -269,40 +263,117 @@ SchemaDefinition( SlotDefinition( name=String("age"), ...) - ], - ... - ) + ), + ... +], ``` -This might be used to validate an instance: - -```python -Person(id=String("P:123"), - name=String("Alex"), - height=... - age=...) +### Class Definition Example, YAML + +The above example following the YAML serialization is: + +```yaml +NamedThing: + abstract: true + slots: + - id + - name +Person: + description: A person, living or dead + is_a: NamedThing + attributes: + height: + ... + age: + ... ``` -See the next sections for validation rules - -### SlotDefinition +## SlotDefinition Metaclass * metamodel documentation: [SlotDefinition](https://w3id.org/linkml/SlotDefinition) -**SlotDefinition** is NOT *instantiable*. Each assignment in a LinkML **ClassDefinition** instance must use a SlotDefinition from the schema. +Instances of **SlotDefinition** are not themselves *instantiable*. Each **assignment** in a LinkML **ClassDefinition** instance must use a SlotDefinition from the schema. + +### SlotDefinition: Normative subset metaslots SlotDefinition inherits from both Element and SlotExpression. A SlotDefinition must have a *name*. other schema elements may refer to **AnonymousSlotExpression**s composed using boolean operators. +Any instance *s* of a SlotDefinition may have assignments in any of the following normative metaslots + +| Name | Cardinality and Range | Description | +| --- | --- | --- | +| [name](name.md) | 1..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the unique name of the element within the context of the schema **identifier** | +| [slot_uri](slot_uri.md) | 0..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | predicate of this slot for semantic web application | +| [identifier](identifier.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | True means that the key slot(s) uniquely identify the container | +| [alias](alias.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the name used for a slot in the context of its owning class | +| [multivalued](multivalued.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | true means that slot can have more than one value | +| [required](required.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | true means that the slot must be present in the loaded definition | +| [recommended](recommended.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | true means that the slot should be present in the loaded definition, but this... | +| [is_a](is_a.md) | 0..1
[SlotDefinition](SlotDefinition.md) | A primary parent slot from which inheritable metaslots are propagated | +| [mixins](mixins.md) | 0..*
[SlotDefinition](SlotDefinition.md) | A collection of secondary parent mixin slots from which inheritable metaslots... | +| [inlined](inlined.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | True means that keyed or identified slot appears in an outer structure by val... | +| [inlined_as_list](inlined_as_list.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | True means that an inlined slot is represented as a list of range instances | +| [pattern](pattern.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the string value of the slot must conform to this regular expression expresse... | +| [rank](rank.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | the relative order in which the element occurs, lower values are given preced... | +| [any_of](any_of.md) | 0..*
[AnonymousSlotExpression](AnonymousSlotExpression.md) | holds if at least one of the expressions hold | +| [exactly_one_of](exactly_one_of.md) | 0..*
[AnonymousSlotExpression](AnonymousSlotExpression.md) | holds if only one of the expressions hold | +| [none_of](none_of.md) | 0..*
[AnonymousSlotExpression](AnonymousSlotExpression.md) | holds if none of the expressions hold | +| [all_of](all_of.md) | 0..*
[AnonymousSlotExpression](AnonymousSlotExpression.md) | holds if all of the expressions hold | +| [domain](domain.md) | 0..1
[ClassDefinition](ClassDefinition.md) | defines the type of the subject of the slot | +| [inherited](inherited.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | true means that the *value* of a slot is inherited by subclasses | +| [ifabsent](ifabsent.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | function that provides a default value for the slot | +| [list_elements_unique](list_elements_unique.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | If True, then there must be no duplicates in the elements of a multivalued sl... | +| [list_elements_ordered](list_elements_ordered.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | If True, then the order of elements of a multivalued slot is guaranteed to be... | +| [shared](shared.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | If True, then the relationship between the slot domain and range is many to o... | +| [key](key.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | True means that the key slot(s) uniquely identify the container | +| [designates_type](designates_type.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | True means that the key slot(s) is used to determine the instantiation (types... | +| [symmetric](symmetric.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | If s is symmetric, and i | +| [reflexive](reflexive.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | If s is reflexive, then i | +| [locally_reflexive](locally_reflexive.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | If s is locally_reflexive, then i | +| [irreflexive](irreflexive.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | If s is irreflexive, then there exists no i such i | +| [asymmetric](asymmetric.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | If s is antisymmetric, and i | +| [transitive](transitive.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | If s is transitive, and i | +| [inverse](inverse.md) | 0..1
[SlotDefinition](SlotDefinition.md) | indicates that any instance of d s r implies that there is also an instance o... | +| [transitive_form_of](transitive_form_of.md) | 0..1
[SlotDefinition](SlotDefinition.md) | If s transitive_form_of d, then (1) s holds whenever d holds (2) s is transit... | +| [reflexive_transitive_form_of](reflexive_transitive_form_of.md) | 0..1
[SlotDefinition](SlotDefinition.md) | transitive_form_of including the reflexive case | +| [slot_group](slot_group.md) | 0..1
[SlotDefinition](SlotDefinition.md) | allows for grouping of related slots into a grouping slot that serves the rol... | +| [is_grouping_slot](is_grouping_slot.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | true if this slot is a grouping slot | +| [disjoint_with](disjoint_with.md) | 0..*
[SlotDefinition](SlotDefinition.md) | Two classes are disjoint if they have no instances in common, two slots are d... | +| [union_of](union_of.md) | 0..*
[TypeDefinition](TypeDefinition.md) | indicates that the domain element consists exactly of the members of the elem... | +| [range](range.md) | 0..1
[Element](Element.md) | defines the type of the object of the slot | +| [range_expression](range_expression.md) | 0..1
[AnonymousClassExpression](AnonymousClassExpression.md) | A range that is described as a boolean expression combining existing ranges | +| [enum_range](enum_range.md) | 0..1
[EnumExpression](EnumExpression.md) | An inlined enumeration | +| [minimum_value](minimum_value.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | for slots with ranges of type number, the value must be equal to or higher th... | +| [maximum_value](maximum_value.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | for slots with ranges of type number, the value must be equal to or lowe than... | +| [structured_pattern](structured_pattern.md) | 0..1
[PatternExpression](PatternExpression.md) | the string value of the slot must conform to the regular expression in the pa... | +| [implicit_prefix](implicit_prefix.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | Causes the slot value to be interpreted as a uriorcurie after prefixing with ... | +| [equals_string](equals_string.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the slot must have range string and the value of the slot must equal the spec... | +| [equals_string_in](equals_string_in.md) | 0..*
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the slot must have range string and the value of the slot must equal one of t... | +| [equals_expression](equals_expression.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the value of the slot must equal the value of the evaluated expression | +| [minimum_cardinality](minimum_cardinality.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | the minimum number of entries for a multivalued slot | +| [maximum_cardinality](maximum_cardinality.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | the maximum number of entries for a multivalued slot | +| [has_member](has_member.md) | 0..1
[AnonymousSlotExpression](AnonymousSlotExpression.md) | the values of the slot is multivalued with at least one member satisfying the... | +| [all_members](all_members.md) | 0..*
[SlotDefinition](SlotDefinition.md) | the value of the multiavlued slot is a list where all elements conform to the... | +| [abstract](abstract.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | Indicates the class or slot cannot be directly instantiated and is intended f... | +| [mixin](mixin.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | Indicates the class or slot is not intended to inherited from without being a... | +| [string_serialization](string_serialization.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | Used on a slot that stores the string serialization of the containing object | +| [id_prefixes](id_prefixes.md) | 0..*
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the identifier of this class or slot must begin with the URIs referenced by t... | +| [from_schema](from_schema.md) | 0..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | id of the schema that defined the element | + +### SlotDefinition: Normative subset UML + +A subset of the above normative slots are depicted as follows: + ```mermaid classDiagram class SlotExpression { +SlotDefinitionName range } -SlotExpression "*" --> AnonymousSlotExpression: any_of -SlotExpression "*" --> AnonymousSlotExpression: exactly_one_of -SlotExpression "*" --> AnonymousSlotExpression: none_of -SlotExpression "*" --> AnonymousSlotExpression: all_of +SlotExpression "1" --> "*" AnonymousSlotExpression: any_of +SlotExpression "1" --> "*" AnonymousSlotExpression: exactly_one_of +SlotExpression "1" --> "*" AnonymousSlotExpression: none_of +SlotExpression "1" --> "*" AnonymousSlotExpression: all_of class SlotDefinition { +SlotDefinitionName name +boolean identifier @@ -334,6 +405,8 @@ SlotDefinition "0..1" --> SlotDefinition: is_a SlotDefinition "*" --> SlotDefinition: mixins ``` +### Slot Definition Example, Functional Syntax + An example collection of SlotDefinitions might be: ```python @@ -341,7 +414,7 @@ SchemaDefinition( slots=[ SlotDefinition( name=String("id"), - identifier=True, + identifier=Boolean(True), description=String("A unique identifier for an object"), range=String("String"), ... @@ -354,11 +427,43 @@ SchemaDefinition( ) ``` -### EnumDefinition +## EnumDefinition Metaclass * metamodel documentation: [EnumDefinition](https://w3id.org/linkml/EnumDefinition) -**EnumDefinition** instances are *instantiable*. +Instances of **EnumDefinition** instances are *instantiable*. For example, a schema may have an enumeration with name "JobCode". This is an instance of an EnumDefinition, and can also be instantiated by different **permissible values** such as "Forklift Driver" + +### EnumDefinition: Normative subset metaslots + +Any instance *e* of a EnumDefinition may have assignments in any of the following normative metaslots: + +| Name | Cardinality and Range | Description | +| --- | --- | --- | +| [name](name.md) | 1..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the unique name of the element within the context of the schema **identifier** | +| [is_a](is_a.md) | 0..1
[Definition](Definition.md) | A primary parent class or slot from which inheritable metaslots are propagate... | +| [mixins](mixins.md) | 0..*
[Definition](Definition.md) | A collection of secondary parent classes or slots from which inheritable meta... | +| [rank](rank.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | the relative order in which the element occurs, lower values are given preced... | +| [enum_uri](enum_uri.md) | 0..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | URI of the enum that provides a semantic interpretation of the element in a l... | +| [code_set](code_set.md) | 0..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | the identifier of an enumeration code set | +| [pv_formula](pv_formula.md) | 0..1
[PvFormulaOptions](PvFormulaOptions.md) | Defines the specific formula to be used to generate the permissible values | +| [permissible_values](permissible_values.md) | 0..*
[PermissibleValue](PermissibleValue.md) | A list of possible values for a slot range | +| [include](include.md) | 0..*
[AnonymousEnumExpression](AnonymousEnumExpression.md) | An enum expression that yields a list of permissible values that are to be in... | +| [minus](minus.md) | 0..*
[AnonymousEnumExpression](AnonymousEnumExpression.md) | An enum expression that yields a list of permissible values that are to be su... | +| [inherits](inherits.md) | 0..*
[EnumDefinition](EnumDefinition.md) | An enum definition that is used as the basis to create a new enum | +| [reachable_from](reachable_from.md) | 0..1
[ReachabilityQuery](ReachabilityQuery.md) | Specifies a query for obtaining a list of permissible values based on graph r... | +| [matches](matches.md) | 0..1
[MatchQuery](MatchQuery.md) | Specifies a match query that is used to calculate the list of permissible val... | +| [concepts](concepts.md) | 0..*
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | A list of identifiers that are used to construct a set of permissible values | +| [abstract](abstract.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | Indicates the class or slot cannot be directly instantiated and is intended f... | +| [mixin](mixin.md) | 0..1
[xsd:boolean](http://www.w3.org/2001/XMLSchema#boolean) | Indicates the class or slot is not intended to inherited from without being a... | +| [string_serialization](string_serialization.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | Used on a slot that stores the string serialization of the containing object | +| [id_prefixes](id_prefixes.md) | 0..*
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the identifier of this class or slot must begin with the URIs referenced by t... | +| [from_schema](from_schema.md) | 0..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | id of the schema that defined the element | + + + +### EnumDefinition: Normative subset UML + +A subset of the above normative slots are depicted as follows: ```mermaid classDiagram @@ -380,11 +485,43 @@ class PermissibleValue { } ``` -### TypeDefinition +## TypeDefinition Metaclass * metamodel documentation: [TypeDefinition](https://w3id.org/linkml/TypeDefinition) -**TypeDefinition** instances are *instantiable*. +Instances of **TypeDefinition** are themselves *instantiable*. For example, a schema might contain a TypeDefinition with name "PhoneNumber". This is an instance of TypeDefinition, and can itself be instantiated by individual phone numbers. + +### TypeDefinition: Normative subset metaslots + +Any instance *t* of a TypeDefinition may have assignments in any of the following normative metaslots: + +| Name | Cardinality and Range | Description | +| --- | --- | --- | +| [name](name.md) | 1..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the unique name of the element within the context of the schema **identifier** | +| [type_uri](type_uri.md) | 0..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | The uri that defines the possible values for the type definition | +| [typeof](typeof.md) | 0..1
[TypeDefinition](TypeDefinition.md) | Names a parent type | +| [base](base.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | python base type that implements this type definition | +| [repr](repr.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the name of the python object that implements this type definition | +| [pattern](pattern.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the string value of the slot must conform to this regular expression expresse... | +| [rank](rank.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | the relative order in which the element occurs, lower values are given preced... | +| [any_of](any_of.md) | 0..*
[AnonymousTypeExpression](AnonymousTypeExpression.md) | holds if at least one of the expressions hold | +| [exactly_one_of](exactly_one_of.md) | 0..*
[AnonymousTypeExpression](AnonymousTypeExpression.md) | holds if only one of the expressions hold | +| [none_of](none_of.md) | 0..*
[AnonymousTypeExpression](AnonymousTypeExpression.md) | holds if none of the expressions hold | +| [all_of](all_of.md) | 0..*
[AnonymousTypeExpression](AnonymousTypeExpression.md) | holds if all of the expressions hold | +| [union_of](union_of.md) | 0..*
[TypeDefinition](TypeDefinition.md) | indicates that the domain element consists exactly of the members of the elem... | +| [structured_pattern](structured_pattern.md) | 0..1
[PatternExpression](PatternExpression.md) | the string value of the slot must conform to the regular expression in the pa... | +| [implicit_prefix](implicit_prefix.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | Causes the slot value to be interpreted as a uriorcurie after prefixing with ... | +| [equals_string](equals_string.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the slot must have range string and the value of the slot must equal the spec... | +| [equals_string_in](equals_string_in.md) | 0..*
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the slot must have range string and the value of the slot must equal one of t... | +| [minimum_value](minimum_value.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | for slots with ranges of type number, the value must be equal to or higher th... | +| [maximum_value](maximum_value.md) | 0..1
[xsd:integer](http://www.w3.org/2001/XMLSchema#integer) | for slots with ranges of type number, the value must be equal to or lowe than... | +| [id_prefixes](id_prefixes.md) | 0..*
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the identifier of this class or slot must begin with the URIs referenced by t... | +| [from_schema](from_schema.md) | 0..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | id of the schema that defined the element | + +### TypeDefinition: Normative subset UML + +A subset of the above normative slots are depicted as follows: + ```mermaid classDiagram @@ -395,43 +532,79 @@ TypeExpression "*" --> AnonymousTypeExpression: any_of TypeExpression "*" --> AnonymousTypeExpression: exactly_one_of TypeExpression "*" --> AnonymousTypeExpression: none_of TypeExpression "*" --> AnonymousTypeExpression: all_of +AnonymousTypeExpression --|> TypeExpression +TypeDefinition --|> TypeExpression class TypeDefinition { +TypeDefinitionName name +TypeDefinitionName typeof +TypeDefinitionName[] mixins +Uri uri +String base - +String repo + +String repr } TypeDefinition --|> TypeExpression: mixin TypeDefinition --|> Element: is_a - ``` -See below for the standard types included with LinkML. These can be extended using `typeof` +### Default Types -### ClassDefinitionReferences +LinkML includes a default schema of types -**ClassDefinitionReferences** are primitive elements that provide a way to reference a particular instances. +* Schema: [https://w3id.org/linkml/types.yaml](https://w3id.org/linkml/types.yaml). +* Documentation: [https://linkml.io/linkml-model/docs/#types](https://linkml.io/linkml-model/docs/#types) -ClassDefinitionReferences are not part of the asserted metamodel but are derived from derivation rules -- see next section +These are: -## Other Schema Elements +- Boolean (Bool) - A binary (true or false) value +- Date (XSDDate) - a date (year, month and day) in an idealized calendar +- Datetime (XSDDateTime) - The combination of a date and time +- Decimal (Decimal) - A real number with arbitrary precision that conforms to the xsd:decimal specification +- Double (float) - A real number that conforms to the xsd:double specification +- Float (float) - A real number that conforms to the xsd:float specification +- Integer (int) - An integer +- Ncname (NCName) - Prefix part of CURIE +- Nodeidentifier (NodeIdentifier) - A URI, CURIE or BNODE that represents a node in a model. +- Objectidentifier (ElementIdentifier) - A URI or CURIE that represents an object in the model. +- String (str) - A character string +- Time (XSDTime) - A time object represents a (local) time of day, independent of any particular day +- Uri (URI) - a complete URI +- Uriorcurie (URIorCURIE) - a URI or a CURIE + +The type schema can be imported, used directly, or used as a basis to extend new types + +## ClassDefinitionReference Metaclass + +**ClassDefinitionReferences** are primitive elements that provide a way to reference a particular instances. -### Prefixes +ClassDefinitionReferences are not part of the asserted schema but are derived from derivation rules -- see next section + +## Prefix Metaclass * metamodel documentation: [Prefix](https://w3id.org/linkml/Prefix) +A schema can contain any number of prefixes. Each prefix maps a short name such as "owl" to a URI. These are used to determine the **canonical URI** for any element. + +### Prefix: Normative subset metaslots + +| Name | Cardinality and Range | Description | +| --- | --- | --- | +| [prefix_prefix](prefix_prefix.md) | 0..1
[xsd:string](http://www.w3.org/2001/XMLSchema#string) | the nsname (sans ':' for a given prefix) | +| [prefix_reference](prefix_reference.md) | 1..1
[xsd:anyURI](http://www.w3.org/2001/XMLSchema#anyURI) | A URI associated with a given prefix | + + +### Prefix: Normative subset UML + ```mermaid classDiagram - SchemaDefinition "*"-->Prefix: prefixes + SchemaDefinition "*" --> "*" Prefix: prefixes class Prefix { +Ncname prefix_prefix +Uri prefix_reference } ``` -Example: + +### Prefix Example, Functional Syntax ```python SchemaDefinition( @@ -447,26 +620,170 @@ SchemaDefinition( ]) ``` -## Composition of Expressions +### Prefix Example, YAML -## Built in and defined types +```yaml +prefixes: + linkml: https://w3id.org/linkml/ + schema: http://schema.org/ + wgs: http://www.w3.org/2003/01/geo/wgs84_pos# + qudt: http://qudt.org/1.1/schema/qudt# +``` -LinkML has a number of types defined in the types model, documented [here](https://linkml.io/linkml-model/docs/#types) +## Complete Schema Example (Informative) + +For example, consider a schema that models representations of individual people and organizations they belong to may include a class definition `Person`, and slot definitions for `name`, `address`, `relationships` and so on. + +This might be depicted in UML as: + +```mermaid +classDiagram + Person "0" --> "*" Person: knows + class Person { + +String id + +String name + +Float height + +Date date_of_birth + +JobCode occupation + + } +``` + +This would have a YAML serialization (see section 6): + +```yaml +classes: + Person: + description: ... + slots: + - id + - name + - height + - date_of_birth + - occupation + - knows +slots: + id: + identifier: true + range: string + name: + range: string + date_of_birth: + range: date + height: + range: float + occupation: + range: JobCode + knows: + range: Person + multivalued: true +enums: + JobCode: + permissible_values: + ForkliftDriver: + ...: +types: + date: + ... +``` + +Because schemas, are instances of the metamodel, this hypothetical schema may be serialized in functional instance syntax as follows: + +``` +SchemaDefinition( + id=String("http://example.org/organization"), + name=String("organization"), + prefixes=[ + Prefix(prefix_prefix=Ncname("linkml") + prefix_reference=Uri("https://w3id.org/linkml/")), + Prefix(prefix_prefix=Ncname("org") + prefix_reference=Uri("http://example.org/organization/")), + Prefix(prefix_prefix=Ncname("schema") + prefix_reference=Uri("http://schema.org")), + Prefix(prefix_prefix=Ncname("wgs") + prefix_reference=Uri("http://www.w3.org/2003/01/geo/wgs84_pos#")), + Prefix(prefix_prefix=Ncname("qudt") + prefix_reference=Uri("http://qudt.org/1.1/schema/qudt#")) + ], + default_prefix=String("org"), + imports=[ + Uriorcurie("linkml:types") + ], + classes=[ + ClassDefinition( + name=String("Person"), + slots=[ + String("id"), + String("name"), + String("height"), + String("age"), + String("knows"), + String("job"), + ... + ] + ), + ClassDefinition( + name=String("Organization"), + slots=[ + String("id"), + ... + ] + ), + ... + ], + slots=[ + SlotDefinition( + name=String("id"), + identifier=Boolean(True), + description=String("..."), + range=String("String"), + ... + ), + SlotDefinition( + name=String("name"), + description=String("..."), + range=String("String"), + ... + ), + SlotDefinition( + name=String("occupation"), + description=String("..."), + range=String("JobCode"), + ... + ), + SlotDefinition( + name=String("date_of_birth"), + description=String("..."), + range=String("Date"), + ... + ), + SlotDefinition( + name=String("knows"), + description=String("..."), + range=String("Person"), + multivalued=Boolean(True), + ... + ) + ], + enums=[ + EnumDefinition( + name=String("JobCode"), + permissible_values=[...], + ) + ], + types=[ + TypeDefinition( + name=String("Date"), + ... + ), + TypeDefinition( + name=String("String"), + ... + ), + ] +) +``` -- Boolean (Bool) - A binary (true or false) value -- Date (XSDDate) - a date (year, month and day) in an idealized calendar -- Datetime (XSDDateTime) - The combination of a date and time -- Decimal (Decimal) - A real number with arbitrary precision that conforms to the xsd:decimal specification -- Double (float) - A real number that conforms to the xsd:double specification -- Float (float) - A real number that conforms to the xsd:float specification -- Integer (int) - An integer -- Ncname (NCName) - Prefix part of CURIE -- Nodeidentifier (NodeIdentifier) - A URI, CURIE or BNODE that represents a node in a model. -- Objectidentifier (ElementIdentifier) - A URI or CURIE that represents an object in the model. -- String (str) - A character string -- Time (XSDTime) - A time object represents a (local) time of day, independent of any particular day -- Uri (URI) - a complete URI -- Uriorcurie (URIorCURIE) - a URI or a CURIE diff --git a/specification/04derived-schemas.md b/specification/04derived-schemas.md index fb6ce844..74efa469 100644 --- a/specification/04derived-schemas.md +++ b/specification/04derived-schemas.md @@ -16,28 +16,37 @@ Derivations happen via *rules* that are specified below, using a set of convenie ## Conventions -We use `m` to denote the input or asserted schema (model), and `m*` to denote the derived schema +We use `m` to denote the input or asserted schema (model), and *mD* to denote the derived schema ## Functions -### Function: ClassIdentifier +### Function: GetIdentifierSlot -The function **ClassIdentifier**(`c`) takes a ClassDefinition or ClassDefinitionName as input and returns: +The function **GetIdentifierSlot**(`c`) takes a ClassDefinition or ClassDefinitionName as input and returns: -- the name of a derived attribute `s` in `c` where `s.identifier` is True in `m*` +- the name of a derived attribute `s` in `c` where `s.identifier` is True in *mD* - **None** if there is no such slot - An error if there are multiple such slots ### Function: Closure The function **Closure**(`x`, `s`) takes as input an element `x` and a metaslot `s` and returns the mathematical closure -of looking up `x.` +of `s` where `s` is treated as a relation between instances `i` and `j` that holds when either: + + - s is not multivalued: `i.s = j` + - s is multivalued: `j member-of i.s` The **ReflexiveClosure** includes `x` +### Function: Parents + +**Parents** itself is the union of `is_a` and `mixins`. + +> **Parents**(x) = `{e : e = x.is_a \/ e in x.mixins }` + ### Function: Ancestors -The function **Ancestors**(`x`) returns the **Closure** of the **Parents** function applied to `x`. **Parents** itself is the union of `is_a` and `mixins`. +The function **Ancestors**(`x`) returns the **Closure** of the **Parents** function applied to `x`. The function **ReflexiveAncestors** uses the **ReflexiveClosure**. @@ -47,9 +56,9 @@ The function **ReflexiveAncestors** uses the **ReflexiveClosure**. Each model imports zero to many imports, indicated by the **SchemaDefinition**.[imports](https://w3id.org/linkml/imports) metaslot. -`m*` is set to be the union of all schema elements from the **ReflexiveClosure** of `m.imports` +*mD* is set to be the union of all schema elements from the **ReflexiveClosure** of `m.imports` -When copying an element `x` from an import into `m*`, the name `x.name` must be unique - if the same name has been used in another model, the derivation procedure fails, and an error is thrown. +When copying an element `x` from an import into *mD*, the name `x.name` must be unique - if the same name has been used in another model, the derivation procedure fails, and an error is thrown. **Note**: If two or more models import the same target (e.g. `m1` imports `m2` and `m3` and `m2` imports `m3`), `m3` will be only be resolved once. @@ -62,7 +71,7 @@ Each imported module must be resolved - i.e the value of the import slot is mapp Each element in the schema as assigned a metaslot `fromschema` value. This is the value of the `id` of the schema in which that element is defined. -This is preserved over imports, such that if `m` imports `m2`, and `m2` defines a class `c`, then `m*[c].fromschema` = m2 +This is preserved over imports, such that if `m` imports `m2`, and `m2` defines a class `c`, then *mD*`[c].fromschema` = m2 ### Rule: Applicable Slot Names @@ -85,7 +94,7 @@ and ancestors of `c` - attributes asserted directly in `c.attributes` in the base schema - attributes derived from each SlotDefinition `s` in `c.slots` by - - looking up `s` in `m*.slots` and copying the slot-value assignments from these SlotDefinitions + - looking up `s` in *mD*`.slots` and copying the slot-value assignments from these SlotDefinitions - overriding these slot-value assignments with any slot-value assignments provided by `c.slot_usage[s]` - inheriting from parents of `c` using precedence rules - inheriting from parents of `s` @@ -154,21 +163,21 @@ representation of instance references in tree-based formats such as JSON. ### Rule: Each referenced entity must be present -Every **ClassDefinition**, **ClassDefinitionReference**, **SlotDefinitionReference**, **EnumDefinitionReference**, and **TypeDefinitionReference** must be resolvable within `m*` +Every **ClassDefinition**, **ClassDefinitionReference**, **SlotDefinitionReference**, **EnumDefinitionReference**, and **TypeDefinitionReference** must be resolvable within *mD* -However, not every element needs to be referenced. For example, it is valid to have a list of SlotDefinitions that are never used in `m*`. +However, not every element needs to be referenced. For example, it is valid to have a list of SlotDefinitions that are never used in *mD*. ### ClassDefinition Structural Conformance Rules -Each `c` in `m*.classes` must conform to the rules below: +Each `c` in *mD*`.classes` must conform to the rules below: - `c` must be an instance of a **ClassDefinition** -- `c` must have a unique name `c.name`, and this name must not be shared by any other class or element in `m*` -- `c` lists permissible slots in `c.slots`, the range of this is a reference to a SlotDefinition in `m*.slots` +- `c` must have a unique name `c.name`, and this name must not be shared by any other class or element in *mD* +- `c` lists permissible slots in `c.slots`, the range of this is a reference to a SlotDefinition in *mD*`.slots` - `c` defines how slots are used in the context of `c` via a collection of SlotDefinitions specified in `c.slot_usage` - `c` may define local slots using `c.attributes`, the value of this is a. collection of SlotDefinitions - `c` may have certain boolean properties defined such as `c.mixin` and `c.abstract` -- `c` must have exactly one value for `c.class_uri` in `m*`, and the value must be an instance of the builtin type UriOrCurie +- `c` must have exactly one value for `c.class_uri` in *mD*, and the value must be an instance of the builtin type UriOrCurie - `c` may have parent ClassDefinitions defined via `c.is_a` and `c.mixins` - the value of `c.is_a` must be a ClassDefinitionReference - the value of `c.mixins` must be a collection of ClassDefinitonReferences @@ -178,14 +187,14 @@ Each `c` in `m*.classes` must conform to the rules below: ### SlotDefinition Structural Conformance Rules -Each `s` in `m*.slots` must conform to the rules below: +Each `s` in *mD*`.slots` must conform to the rules below: - `s` must be an instance of a **SlotDefinition** - `s` must have a unique name `s.name`, and this name must not be shared by any other type or element -- `s` must have a range specified via `s.range` in `m*` +- `s` must have a range specified via `s.range` in *mD* - `s` may have an assignment `s.identifier` which is True if `s` plays the role of a unique identifier - `s` may have certain boolean properties defined such as `s.mixin` and `s.abstract` -- `s` must have exactly one value for `s.slot_uri` in `m*`, and the value must be an instance of the builtin type UriOrCurie +- `s` must have exactly one value for `s.slot_uri` in *mD*, and the value must be an instance of the builtin type UriOrCurie - `s` may have parent SlotDefinitions defined via `s.is_a` and `s.mixins` - the value of `s.is_a` must be a **SlotDefinitionReference** - the value of `s.mixins` must be a collection of **SlotDefinitionReference**s @@ -194,17 +203,17 @@ Each `s` in `m*.slots` must conform to the rules below: ### TypeDefinition Structural Conformance Rules -Each `s` in `m*.types` must conform to the rules below: +Each `s` in *mD*`.types` must conform to the rules below: - `t` must be an instance of a **TypeDefinition** - `t` must have a unique name `t.name`, and this name must not be shared by any other type or element -- `t` must have a mapping to an xsd type provided via `t.uri` in `m*` +- `t` must have a mapping to an xsd type provided via `t.uri` in *mD* - `t` may have a parent type declared via `t.typeof` - `t` may have any number of additional slot-value assignments consistent with the validation rules provided here with the metamodel `MM` ### EnumDefinition Structural Conformance Rules -Each `e` in `m*.enums` must conform to the rules below: +Each `e` in *mD*`.enums` must conform to the rules below: - `e` must be an instance of a **EnumDefinition** - `e` must have a unique name `e.name`, and this name must not be shared by any other enum or element @@ -213,7 +222,7 @@ Each `e` in `m*.enums` must conform to the rules below: ### ClassDefinitionReference Structural Conformance Rules -Each `r` in `m*.class_references` must conform to the rules below: +Each `r` in *mD*`.class_references` must conform to the rules below: - `r` must be an instance of a **ClassDefinitionReference** - `r` must have a unique name `r.name`, and this name must not be shared by any other type or element diff --git a/specification/05validation.md b/specification/05validation.md index b55044e5..bcb64fbb 100644 --- a/specification/05validation.md +++ b/specification/05validation.md @@ -2,12 +2,14 @@ **Validation** is a procedure that takes as input: -* A LinkML instance structure `i`, where `i` is to be validated -* A LinkML instance structure `root`. This may be the same as `i` -* A *derived* schema `m*` +* A LinkML instance `i`, where `i` is to be validated +* A LinkML instance `root`. This MUST contain `i` and MAY be the same as `i` +* A schema *m* The validation procedure will produce output that can be used to determine if the instance is *structurally and semantically valid* according to the schema. +The formal specification of the validation procedure takes as input a *derived* schema *mD*: + ```mermaid flowchart TD M[Asserted Schema] --> Derivation{Derivation Procedure} @@ -18,6 +20,8 @@ flowchart TD Mstar --> Validation ``` +Actual implementations may choose to perform this composition or work directly on the asserted schema. + The following holds for any validation procedure: - The output MUST include a boolean slot indicating whether the input can be demonstrated to be false @@ -28,6 +32,8 @@ The following holds for any validation procedure: - The procedure MAY restrict validation to defined subsets (profiles) of the Metamodel - The procedure SHOULD return in its payload an indication of which profile and version is used. +## Validation procedure for instances + The validation procedure is to first take the metaclass that is instantiated by the type of the instance `i`, and apply one of the 4 checks below, with each check performing its own sub-rules. The ClassDefinition check is *recursive*, checking each slot-value assignment. This means a check on any instance will always validate the @@ -37,23 +43,23 @@ full instance tree. Given an instance `i` of a ClassDefinition: -*ClassDefinitionName*( `SVs` ) +**ClassDefinitionName**( **Assignments** ) -Where `SVs` is a collection of length `N`, with index `i..N` and members `slot_i=value_i`, -and *ClassDefinitionName* is the name of a ClassDefinition in `m*`, such that `C=m*.classes[ClassDefinitionName]` +Where **Assignments** is a collection of length `N`, with index `i..N` and members `slot_i=value_i`, +and *ClassDefinitionName* is the name of a ClassDefinition in *mD*, such that `C`=mD`.classes[ClassDefinitionName]` -### CD Rule: Assignment values must be valid +### Rule: Assignment values must be valid -for each `slot=value` assignment in `SVs`, the validation procedure is performed on `value`, with +for each `slot=value` assignment in **Assignments**, the validation procedure is performed on `value`, with `root` remaining the same -### CD Rule: ClassDefinition instances must instantiate a class in the schema +### Rule: ClassDefinition instances must instantiate a class in the schema -*ClassDefinitionName* MUST be the name of a ClassDefinition in `m*` +*ClassDefinitionName* MUST be the name of a ClassDefinition in *mD* -`C` is assigned to be the value of `m*[ClassDefinitionName]` +`C` is assigned to be the value of mD`[ClassDefinitionName]` -`Atts` is assigned to be the value of `C.attributes` (see the previous section) +**Assignments** is assigned to be the value of `C.attributes` (see the previous section) `C` SHOULD have all the following properties: @@ -61,49 +67,49 @@ for each `slot=value` assignment in `SVs`, the validation procedure is performed - `C.abstract` SHOULD NOT be **True** - `C.mixin` SHOULD NOT be **True** -### CD Rule: identifiers must be unique +### Rule: identifiers must be unique We define a procedure **IdVal**(`i`) which yields the value of `i.` where `identifier_slot` -is the slot n `Atts` with metaslot assignment `identifier`=**True** +is the slot n **Assignments** with metaslot assignment `identifier`=**True** If there is no such slot then `**IdVal**(`i`) is None and this check is ignored. `i` is invalid if there exists another instance `j` such that `j` is reachable from `root`, and **IdVal**(`i`)=**IdVal**(`j`) and `i` and `j` are distinct. -### CD Rule: All assignments must be to permitted slots +### Rule: All assignments must be to permitted slots For each `s=value` assignment in <*Assignment1*>, <*Assignment2*>, ..., <*AssignmentN*>: -- `s` must be in `Atts` +- `s` must be in **Assignments** ### Rule: All required slots must be specified -For each slot `s` in `Atts`, if `s.required=True`, then `i.` must be neither `None` nor the empty collection `[]` +For each slot `s` in **Assignments**, if `s.required=True`, then `i.` must be neither `None` nor the empty collection `[]` ### Rule: All recommended slots should be specified -For each slot `s` in `Atts`, if `s.recommended=True`, then `i.` should be neither `None` nor the empty collection `[]` +For each slot `s` in **Assignments**, if `s.recommended=True`, then `i.` should be neither `None` nor the empty collection `[]` If this condition is not met, this is considered a warning rather than invalidity ### Rule: Assigned values must conform to multivalued cardinality -For each slot `s` in `Atts`, +For each slot `s` in **Assignments**, * if `s.multivalued` is True, then `i.` must be a collection or None * If `s.multivalued` is False, then `i.` must not be a collection ### Rule: values should be within stated bounds -For each slot `s` in `Atts`, +For each slot `s` in **Assignments**, * if `s.maximum_value` is not None, then `i.` must be a number and must be less that or equal to the maximum value * if `s.minimum_value` is not None, then `i.` must be a number and must be greater that or equal to the minimum value ### Rule: values should equal evaluable expression -For each slot `s` in `Atts`, if `s.equals_expression` is not None, then `i.` must equal +For each slot `s` in **Assignments**, if `s.equals_expression` is not None, then `i.` must equal the value of `Eval(s.equals_expression)`. See section on expression language for details of syntax. @@ -111,7 +117,7 @@ Note: this rule can be executed in inference mode ### Rule: values should equal string_serialization -For each slot `s` in `Atts`, if `s.string_serialization` is not None, then `i.` must equal +For each slot `s` in **Assignments**, if `s.string_serialization` is not None, then `i.` must equal the value of `Stringify(s.string_serialization)`. See section on expression language for details of syntax. @@ -121,7 +127,7 @@ Note: this rule can be executed in inference mode ### Range class instantiation check -For each slot `s` in `Atts`, if `i.` is not None, and `s.range` is in `m*.classes`, +For each slot `s` in **Assignments**, if `i.` is not None, and `s.range` is in `m*.classes`, then `s.range` must be in `ReflexiveAncestors(Type(i.))` Additional checks MAY be performed based on whether `s.inlined` is True @@ -131,22 +137,7 @@ then `s.range` must be in `ReflexiveAncestors(Type(i.))` * `i.` SHOULD be a Reference * OR `i.` instantiates a class `R` such that R has no slot `rs` that is declared to be an identifier. i.e. `rs.identifier = True` -### Range type check - -For each slot `s` in `Atts`, if `i.` is not None, and `s.range` is in `m*.types`, -where `i. = *T*( **AtomicValue** )` must match `s.range`, - -here `T.uri` is used to determine the type: - -- for xsd floats, doubles, and decimals, AtomicValue must be a decimal- for xsd floats, doubles, and decimals, AtomicValue must be a decimal -- for xsd ints, AtomicValue must be an Integer -- for xsd dates, datetimes, and times, AtomicValue must be a string conforming to the relevant ISO type -- for xsd booleans, AtomicValue must be True or False - -### Range enum check -For each slot `s` in `Atts`, if `i.` is not None, and `s.range` is in `m*.enums`, -then `i.` must be equal to `pv.text` for some pv in `m*.enums[s.range]` ### Boolean combinations of expressions @@ -175,7 +166,7 @@ In all cases, the semantics are as follows: ### range expression checks -For each slot `s` in `Atts`, if `i.` is not None, and `RE = s.range_expression` is not None, then a check +For each slot `s` in **Assignments**, if `i.` is not None, and `RE = s.range_expression` is not None, then a check **CE**(`x`) is performed on `i.` ### Rule evaluation @@ -189,6 +180,23 @@ For each rule `r` in *C*.rules: ### type designator checks +## Validation of TypeDefinitions + +For each slot `s` in **Assignments**, if `i.` is not None, and `s.range` is in `m*.types`, +where `i. = *T*( **AtomicValue** )` must match `s.range`, + +here `T.uri` is used to determine the type: + +- for xsd floats, doubles, and decimals, AtomicValue must be a decimal- for xsd floats, doubles, and decimals, AtomicValue must be a decimal +- for xsd ints, AtomicValue must be an Integer +- for xsd dates, datetimes, and times, AtomicValue must be a string conforming to the relevant ISO type +- for xsd booleans, AtomicValue must be True or False + +## Validation of EnumDefinitions + +For each slot `s` in **Assignments**, if `i.` is not None, and `s.range` is in `m*.enums`, +then `i.` must be equal to `pv.text` for some pv in `m*.enums[s.range]` + ## Inference of new values diff --git a/specification/06mapping.md b/specification/06mapping.md index 3cf1e1b1..2fe0b205 100644 --- a/specification/06mapping.md +++ b/specification/06mapping.md @@ -3,12 +3,17 @@ This section describes how LinkML instances are translated to different formats and data models. - Conversion *from* LinkML instances to another format is called *serialization* or *dumping* -- Conversion *to* LinkML instances from another format is called *parsing* or *loading* +- Conversion *to* LinkML instances from another format is called *parsing*, *loading*, or *deserialization* The reference implementation is the [linkml-runtime](https://github.com/linkml/linkml-runtime/) but other implementations that conform to this specification are valid. ## Translation of instances to JSON or YAML +Here we define a mapping of LinkML instances to JSON. + +As JSON is a subset of YAML, this can also be used to load and dump from YAML. This is the canonical YAML mapping. We leave open +the possibility of a *direct* YAML conversion in future which makes use of YAML tags to encode typing information. + - Serialization to JSON takes as input: - a (root) instance - Parsing from JSON takes as input: @@ -20,15 +25,16 @@ The reference implementation is the [linkml-runtime](https://github.com/linkml/l Given an instance `i` of a ClassDefinition: -*ClassDefinitionName*( `s1=v1`, ..., `sn=v2` ) +*ClassDefinitionName*( `s1=v1`, ..., `sn=vn` ) -This is translated to +This is translated to a JSON *object* ```json { Tr(s1) = Tr(v1), - ..., - Tr(s2) = Tr(v2) + Tr(s2) = Tr(v2) + ..., + Tr(sn) = Tr(vn) } ``` @@ -43,9 +49,11 @@ set the target ClassDefinitionName ### Instances of None -The parent slot is omitted +None is mapped to JSON nulls. + +It is conventional to omit an assignment whose value is None/null when serializing. The canonical serialization MUST omit these, but a parser MUST accept None/null values that are explicitly provided -### Instances of Collection +### Instances of Collections of InstancesOfClass If the parent slot `s.inlined_as_list=True` @@ -69,6 +77,19 @@ otherwise: ] ``` +### Instances of other Collections + +These are always translated to lists: + +``` +[ + Tr(member_1), + ... + Tr(member_n), +] +``` + + ### Instances of TypeDefinition The direct value is used @@ -100,15 +121,15 @@ LinkML provides standard types: - Curie - Uri -- CurieOrUri +- Curieoruri The syntax for a CURIE is defined by [W3C CURIE Syntax 1.0](https://www.w3.org/TR/curie/) -**curie** := [ [ **prefix** ] ':' ] **reference** +> **curie** := [ [ **prefix** ] ':' ] **reference** -**prefix** := **[NCName](https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-NCName)** +> **prefix** := **[NCName](https://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-NCName)** -**reference** := **[irelative-ref](https://www.ietf.org/rfc/rfc3987.txt)** +> **reference** := **[irelative-ref](https://www.ietf.org/rfc/rfc3987.txt)** We define a function **CurieToUri**(`x`) that maps (expands) a CurieOrUri to a Uri diff --git a/specification/index.md b/specification/index.md new file mode 100644 index 00000000..6b091d18 --- /dev/null +++ b/specification/index.md @@ -0,0 +1,11 @@ +# LinkML Specification + +## Table of Contents + +- [00preamble](Preamble) +- [01introduction](Introduction) +- [02instances](Instances) +- [03schemas](Schemas) +- [04derived-schemas](Derived Schemas) +- [05validation](Validation) +- [06mapping](Mapping) diff --git a/targets.sh b/targets.sh deleted file mode 100644 index 0fb00aae..00000000 --- a/targets.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -# Generate a variable that establishes all of the target directories -export TGTS="docs graphql json jsonld jsonschema owl rdf shex" diff --git a/utils/get-value.sh b/utils/get-value.sh new file mode 100755 index 00000000..bc102360 --- /dev/null +++ b/utils/get-value.sh @@ -0,0 +1,4 @@ +#!/bin/sh +# get the value of a key in the about.yaml file +# https://stackoverflow.com/questions/1221833/pipe-output-and-capture-exit-status-in-bash +grep $1 about.yaml | sed "s/$1:[[:space:]]//" ; test ${PIPESTATUS[0]} -eq 0