Skip to content

Commit

Permalink
Merge pull request #33 from w3c/jsonld-postprocessing
Browse files Browse the repository at this point in the history
JSON-LD @context postprocessing
  • Loading branch information
mahdanoura authored Sep 16, 2024
2 parents cdb1896 + 669652f commit 2ffd89a
Show file tree
Hide file tree
Showing 6 changed files with 228 additions and 109 deletions.
93 changes: 87 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import json
import subprocess

from linkml.generators.jsonschemagen import JsonSchemaGenerator
Expand All @@ -7,7 +8,12 @@
from linkml.generators.jsonldcontextgen import ContextGenerator
from linkml.generators.docgen import DocGenerator
from linkml.generators.linkmlgen import LinkmlGenerator
from linkml_runtime.utils.schemaview import SchemaView
from linkml_runtime.linkml_model.meta import AnonymousSlotExpression
from pathlib import Path
from pyld import jsonld

from linkml_runtime.linkml_model.meta import SlotDefinition

RESOURCES_PATH = Path('resources')
GENS_PATH = RESOURCES_PATH / 'gens'
Expand All @@ -25,28 +31,102 @@ def generate_docs():
doc_generator = DocGenerator(YAML_SCHEMA_PATH, mergeimports=False)
doc_generator.serialize(directory=str(DOCDIR))


#camelCase conversion of class names in the generated context file
# updated_context = {}
# for cn, value in generated_context.items():
# camel_case_key = cn[0].lower() + cn[1:]
# if value != {}:
# updated_context[camel_case_key] = value
# serialized_schema_json['@context'] = updated_context
# generated_context = updated_context
def post_process_jsonld_context(schema_view: SchemaView, serialized_schema: str) -> str:
serialized_schema_json = json.loads(serialized_schema)
generated_context = serialized_schema_json.get('@context', {})
default_range = schema_view.schema.default_range if schema_view.schema.default_range else "string"
for slot in schema_view.all_slots().values():
slot_name = slot.name
context_entry = generated_context.get(slot_name, {})
# Update JSON-LD context for slots with multi-language support
is_langstring = slot.range == 'langString'
is_exactly_one_language = (
slot.range is None and
any(
expr.range == 'langString'
for expr in slot.exactly_one_of if isinstance(expr, AnonymousSlotExpression)
)
)
if is_langstring or is_exactly_one_language and isinstance(context_entry, dict):
serialized_schema_json['@context'][slot_name]['@container'] = '@language'
if '@type' in serialized_schema_json['@context'][slot_name].keys():
del serialized_schema_json['@context'][slot_name]['@type']
# Update JSON-LD context for slots with a specific encoded language
if slot.in_language and isinstance(context_entry, dict):
context_entry['@language'] = slot.in_language
# inlined and multivalued slot conditions are used to identify dictionaries
if slot.inlined and slot.multivalued and not slot.inlined_as_list and isinstance(context_entry, dict):
context_entry['@container'] = '@index'
if not hasattr(context_entry, '@type'):
context_entry['@type'] = '@id'
if slot.instantiates:
context_entry['@index'] = slot.instantiates
#exactly_one_of expressions
if hasattr(slot, 'exactly_one_of') and slot.exactly_one_of:
ranges = [opt['range'] for opt in slot.exactly_one_of if 'range' in opt]
if len(set(ranges)) == 1:
range_type = ranges[0]
context_entry["@type"] = f"xsd:{range_type}"
else:
print(f"Warning: Slot {slot_name} has different ranges")
elif slot.range == default_range:
context_entry["@type"] = f"xsd:{default_range}"
#Change property name with those that provide aliases
# if slot.aliases and isinstance(context_entry, dict):
# generated_context[slot.aliases[0]] = generated_context.pop(slot_name)
# context_entry = generated_context[slot.aliases[0]]
generated_context[slot_name] = context_entry
#The multivalued slots which do not already have a @container are assigned to a @set
for slot in schema_view.all_slots().values():
context_entry = generated_context.get(slot.name, {})
if slot.multivalued and isinstance(context_entry, dict) and '@container' not in context_entry:
context_entry['@container'] = '@set'
context_entry.pop('@type', None)
generated_context[slot.name] = context_entry
# for c in schema_view.all_classes().values():
# class_name = c.name
# context_entry = generated_context.get(class_name, {})
# if c.aliases and isinstance(context_entry, dict):
# generated_context[c.aliases[0]] = generated_context.pop(class_name)
# context_entry = generated_context[c.aliases[0]]
# generated_context[class_name] = context_entry
return json.dumps(serialized_schema_json, indent=3)


def main(generate_docs_flag, serve_docs_flag):
if not YAML_SCHEMA_PATH.exists():
print(f"LinkML schema file does not exist: {YAML_SCHEMA_PATH}")
return
linkml_schema_view = SchemaView(YAML_SCHEMA_PATH, merge_imports=True)
# TODO: add pre processing for LinkML if needed
for generator in GENERATORS:
output_dir = GENS_PATH / generator
output_dir.mkdir(parents=True, exist_ok=True)
if generator == 'jsonschema':
# json_schema_generator = JsonSchemaGenerator(yaml_content, top_class="Thing")
json_schema_generator = JsonSchemaGenerator(YAML_SCHEMA_PATH, mergeimports=True)
json_schema_generator = JsonSchemaGenerator(linkml_schema_view.schema, mergeimports=True)
(output_dir / 'jsonschema.json').write_text(json_schema_generator.serialize())
elif generator == 'shacl':
shacl_generator = ShaclGenerator(YAML_SCHEMA_PATH, mergeimports=False, closed=True, suffix='Shape')
shacl_generator = ShaclGenerator(linkml_schema_view.schema, mergeimports=False, closed=True, suffix='Shape')
(output_dir / 'shapes.shacl.ttl').write_text(shacl_generator.serialize())
elif generator == 'owl':
owl_generator = OwlSchemaGenerator(YAML_SCHEMA_PATH,)
owl_generator = OwlSchemaGenerator(linkml_schema_view.schema,)
(output_dir / 'ontology.owl.ttl').write_text(owl_generator.serialize())
elif generator == 'jsonldcontext':
context_generator = ContextGenerator(YAML_SCHEMA_PATH, mergeimports=False)
(output_dir / 'context.jsonld').write_text(context_generator.serialize())
context_generator = ContextGenerator(linkml_schema_view.schema, mergeimports=True)
(output_dir / 'context.jsonld').write_text(post_process_jsonld_context(linkml_schema_view,
context_generator.serialize()))
elif generator == 'linkml':
linkml_generator = LinkmlGenerator(YAML_SCHEMA_PATH, mergeimports=True, format='yaml', output='linkml.yaml')
linkml_generator = LinkmlGenerator(linkml_schema_view.schema, mergeimports=True, format='yaml', output='linkml.yaml')
(output_dir / 'linkml.yaml').write_text(linkml_generator.serialize())
else:
print(f"Unknown generator: {generator}")
Expand All @@ -69,3 +149,4 @@ def main(generate_docs_flag, serve_docs_flag):
help='Boolean for serving the generated documentation.')
args = parser.parse_args()
main(args.local_docs, args.serve_docs)

4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ include = ["README.md", "src/thing_description_schema/schema", "project"]

requires-python = ">=3.12"
dependencies = [
"linkml-runtime>=1.8.1",
"linkml-runtime>=1.8.2",
"linkml>=1.8.2",
"mkdocs-mermaid2-plugin>=1.1.1",
"mkdocs-material>=9.5.32",
Expand All @@ -23,4 +23,4 @@ requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.hatch.build]
only-include = ["main.py"]
only-include = ["main.py"]
49 changes: 25 additions & 24 deletions resources/schemas/hctl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ title: hctl
version: "1.1-11-June-2024"
description: |-
LinkML schema for modelling the TD Hypermedia Control information model, in particular links and forms.
contributors: Mahda_Noura
license: MIT
see_also:
- https://www.w3.org/TR/wot-thing-description11/
Expand Down Expand Up @@ -40,26 +41,32 @@ classes:
Link:
class_uri: hctl:Link
description: >-
A link can be viewed as a statement of the form link context that has a relation type resource at link target, where the optional target attributes may further describe the resource.
A link can be viewed as a statement of the form link context that has a relation type resource at link target,
where the optional target attributes may further describe the resource.
attributes:
type:
description: Target attribute providing a hint indicating what the media type [IANA-MEDIA-TYPES] of the result of dereferencing the link should be.
slot_uri: hctl:hintsAtMediaType
relation:
rel:
description: >-
A link relation type identifies the semantics of a link.
slot_uri: hctl:hasRelationType
anchor:
description: >-
By default, the context, or anchor, of a link conveyed in the Link header field is the URL of the representation it is associated with, as defined in RFC7231, Section 3.1.4.1, and is serialized as a URI.
range: uriorcurie
By default, the context, or anchor, of a link conveyed in the Link header field is the URL of the
representation it is associated with, as defined in RFC7231, Section 3.1.4.1, and is serialized as a URI.
slot_uri: hctl:hasAnchor
range: uri
sizes:
description: >-
Target attribute that specifies one or more sizes for the referenced icon. Only applicable for relation type 'icon'. The value pattern follows {Height}x{Width} (e.g., \"16x16\", \"16x16 32x32\").
Target attribute that specifies one or more sizes for the referenced icon. Only applicable for relation type
'icon'. The value pattern follows {Height}x{Width} (e.g., \"16x16\", \"16x16 32x32\").
slot_uri: hctl:hasSizes
hreflang:
description: >-
The hreflang attribute specifies the language of a linked document. The value of this must be a valid language tag [[BCP47]].
multivalued: true
pattern: "^(((([A-Za-z]{2,3}(-([A-Za-z]{3}(-[A-Za-z]{3}){0,2}))?)|[A-Za-z]{4}|[A-Za-z]{5,8})(-([A-Za-z]{4}))?(-([A-Za-z]{2}|[0-9]{3}))?(-([A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3}))*(-([0-9A-WY-Za-wy-z](-[A-Za-z0-9]{2,8})+))*(-(x(-[A-Za-z0-9]{1,8})+))?)|(x(-[A-Za-z0-9]{1,8})+)|((en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR|sgn-BE-NL|sgn-CH-DE)|(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka|zh-min|zh-min-nan|zh-xiang)))$"
The hreflang attribute specifies the language of a linked document. The value of this must be a valid
language tag [[BCP47]].
slot_uri: hctl:hasHreflang
slots:
- href
Form:
Expand All @@ -81,20 +88,13 @@ classes:
Content codings are primarily used to allow a representation to be compressed or otherwise usefully transformed
without losing the identity of its underlying media type and without loss of information. Examples of content
coding include \"gzip\", \"deflate\", etc.
securityDefinitions:
description: >-
Set of security definition names, chosen from those defined in securityDefinitions. These must all be satisfied for access to resources.
slot_uri: td:hasSecurityConfiguration
exactly_one_of:
- range: string
- range: string
multivalued: true
slot_uri: hctl:forContentCoding
scopes:
description: >-
TODO Check, was not in hctl ontology, if not could be source of discrepancy.
Set of authorization scope identifiers provided as an array. These are provided in tokens returned by an
authorization server and associated with forms in order to identify what resources a client may access and how.
The values associated with a form SHOULD be chosen from those defined in an OAuth2SecurityScheme active on that form.
slot_uri: hctl:scopes
exactly_one_of:
- range: string
- range: string
Expand All @@ -104,8 +104,8 @@ classes:
This optional term can be used if, e.g., the output communication metadata differ from input metadata (e.g., output contentType differ from the
input contentType). The response name contains metadata that is only valid for the response messages.
slot_uri: hctl:returns
aliases:
- returns
exact_mappings:
- hctl:returns
range: ExpectedResponse
additionalResponse:
description: >-
Expand All @@ -115,8 +115,8 @@ classes:
slot_uri: hctl:additionalReturns
multivalued: true
range: AdditionalExpectedResponse
aliases:
- additional returns
exact_mappings:
- hctl:additionalReturns
subprotocol:
slot_uri: hctl:forSubProtocol
description: >-
Expand All @@ -134,6 +134,7 @@ classes:
multivalued: true
slots:
- href
- security
ExpectedResponse:
class_uri: hctl:ExpectedResponse
description: >-
Expand Down Expand Up @@ -174,10 +175,10 @@ slots:
description: >-
Target IRI of a link or submission target of a Form.
slot_uri: hctl:hasTarget
aliases:
- target
required: true
range: uriorcurie
range: uri
exact_mappings:
- hctl:target

enums:
SubProtocolTypes:
Expand Down
Loading

0 comments on commit 2ffd89a

Please sign in to comment.