Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Forward RDF context between frontend and backend representations (Metadata and mlc.Metadata). #335

Merged
merged 4 commits into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,10 @@ jobs:
- name: Install library
run: pip install -r requirements.txt

- name: Install mlcroissant
run: sudo apt-get install -y libgraphviz-dev && pip install .[dev]
working-directory: ./python/mlcroissant

- name: PyTest
run: pytest .

Expand Down
2 changes: 2 additions & 0 deletions python/mlcroissant/mlcroissant/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from mlcroissant._src.structure_graph.nodes.file_object import FileObject
from mlcroissant._src.structure_graph.nodes.file_set import FileSet
from mlcroissant._src.structure_graph.nodes.metadata import Metadata
from mlcroissant._src.structure_graph.nodes.rdf import Rdf
from mlcroissant._src.structure_graph.nodes.record_set import RecordSet
from mlcroissant._src.structure_graph.nodes.source import Extract
from mlcroissant._src.structure_graph.nodes.source import FileProperty
Expand All @@ -29,6 +30,7 @@
"FileSet",
"GenerationError",
"Metadata",
"Rdf",
"Records",
"RecordSet",
"Source",
Expand Down
20 changes: 17 additions & 3 deletions wizard/core/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class FileObject:
encoding_format: str | None = None
sha256: str | None = None
df: str | None = None
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)


@dataclasses.dataclass
Expand All @@ -43,6 +44,7 @@ class FileSet:
encoding_format: str | None = ""
includes: str | None = ""
name: str = ""
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)


@dataclasses.dataclass
Expand All @@ -52,8 +54,9 @@ class Field:
name: str | None = None
description: str | None = None
data_types: str | list[str] | None = None
source: mlc.nodes.Source | None = None
references: mlc.nodes.Source | None = None
source: mlc.Source | None = None
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)
references: mlc.Source | None = None


@dataclasses.dataclass
Expand All @@ -65,6 +68,7 @@ class RecordSet:
is_enumeration: bool | None = None
key: str | list[str] | None = None
fields: list[Field] = dataclasses.field(default_factory=list)
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)


@dataclasses.dataclass
Expand All @@ -78,6 +82,7 @@ class Metadata:
url: str = ""
distribution: list[FileObject | FileSet] = dataclasses.field(default_factory=list)
record_sets: list[RecordSet] = dataclasses.field(default_factory=list)
rdf: mlc.Rdf = dataclasses.field(default_factory=mlc.Rdf)

def __bool__(self):
return self.name != "" and self.url != ""
Expand Down Expand Up @@ -124,6 +129,7 @@ def to_canonical(self) -> mlc.Metadata:
content_url=file.content_url,
encoding_format=file.encoding_format,
content_size=file.content_size,
rdf=file.rdf,
sha256=file.sha256,
)
)
Expand All @@ -137,6 +143,7 @@ def to_canonical(self) -> mlc.Metadata:
description=field.description,
data_types=field.data_types,
source=field.source,
rdf=field.rdf,
references=field.references,
)
)
Expand All @@ -147,6 +154,7 @@ def to_canonical(self) -> mlc.Metadata:
key=record_set.key,
is_enumeration=record_set.is_enumeration,
fields=fields,
rdf=record_set.rdf,
)
)
return mlc.Metadata(
Expand All @@ -156,11 +164,12 @@ def to_canonical(self) -> mlc.Metadata:
description=self.description,
url=self.url,
distribution=distribution,
rdf=self.rdf,
record_sets=record_sets,
)

@classmethod
def from_canonical(cls, canonical_metadata: mlc.nodes.Metadata) -> Metadata:
def from_canonical(cls, canonical_metadata: mlc.Metadata) -> Metadata:
distribution = []
for file in canonical_metadata.distribution:
if isinstance(file, mlc.FileObject):
Expand All @@ -172,6 +181,7 @@ def from_canonical(cls, canonical_metadata: mlc.nodes.Metadata) -> Metadata:
content_size=file.content_size,
encoding_format=file.encoding_format,
content_url=file.content_url,
rdf=file.rdf,
sha256=file.sha256,
)
)
Expand All @@ -182,6 +192,7 @@ def from_canonical(cls, canonical_metadata: mlc.nodes.Metadata) -> Metadata:
contained_in=file.contained_in,
description=file.description,
encoding_format=file.encoding_format,
rdf=file.rdf,
)
)
record_sets = []
Expand All @@ -194,6 +205,7 @@ def from_canonical(cls, canonical_metadata: mlc.nodes.Metadata) -> Metadata:
description=field.description,
data_types=field.data_types,
source=field.source,
rdf=field.rdf,
references=field.references,
)
)
Expand All @@ -204,6 +216,7 @@ def from_canonical(cls, canonical_metadata: mlc.nodes.Metadata) -> Metadata:
is_enumeration=record_set.is_enumeration,
key=record_set.key,
fields=fields,
rdf=record_set.rdf,
)
)
return cls(
Expand All @@ -213,5 +226,6 @@ def from_canonical(cls, canonical_metadata: mlc.nodes.Metadata) -> Metadata:
license=canonical_metadata.license,
url=canonical_metadata.url,
distribution=distribution,
rdf=canonical_metadata.rdf,
record_sets=record_sets,
)
2 changes: 1 addition & 1 deletion wizard/cypress/e2e/loadCroissant.cy.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ describe('Wizard loads Croissant without Error', () => {
cy.readFile(path.join(downloadsFolder, "croissant.json"))
.then((downloadedFile) => {
downloadedFile = JSON.stringify(downloadedFile)
return downloadedFile.replaceAll("https://www.wikidata.org/wiki/", "wd:").replace("ml:transform\"", "ml:transform\",\"wd\":\"https://www.wikidata.org/wiki/\"")
return downloadedFile
})
.should('deep.equal', JSON.stringify(fileContent))
})
Expand Down
12 changes: 6 additions & 6 deletions wizard/views/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def render_jsonld():
distribution = []
for file in croissant.distribution:
distribution.append(
mlc.nodes.FileObject(
mlc.FileObject(
name=file.name,
description=file.description,
content_url=file.content_url,
Expand All @@ -26,26 +26,26 @@ def render_jsonld():
fields = []
for _, field in record_set.get("fields", pd.DataFrame()).iterrows():
fields.append(
mlc.nodes.Field(
mlc.Field(
name=field["name"],
description=field["description"],
data_types=field["data_type"],
source=mlc.nodes.Source(
source=mlc.Source(
uid=file.name,
node_type="distribution",
extract=mlc.nodes.Extract(column=field["name"]),
extract=mlc.Extract(column=field["name"]),
),
)
)
record_sets.append(
mlc.nodes.RecordSet(
mlc.RecordSet(
name=record_set["name"],
description=record_set["description"],
fields=fields,
)
)
if croissant.metadata:
metadata = mlc.nodes.Metadata(
metadata = mlc.Metadata(
name=croissant.metadata.name,
citation=croissant.metadata.citation,
license=croissant.metadata.license,
Expand Down