Skip to content
This repository has been archived by the owner on Oct 2, 2024. It is now read-only.

Docs/end-to-end-tutorial #52

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
e58a948
test: add integration tests for updating records
burtenshaw Mar 26, 2024
870ab4f
feat: add basic update logic to records api
burtenshaw Mar 26, 2024
e4b6330
feat: implement update filter logic in dataset records
burtenshaw Mar 26, 2024
ecc004c
fix: excess import in suggestion model
burtenshaw Mar 27, 2024
22ede25
feat: add from model class method to reponse resource
burtenshaw Mar 27, 2024
6dd98e7
feat: implement suggestion as complete resource with from_model metho…
burtenshaw Mar 27, 2024
9d2dfe3
feat: implement Record as resource with containers
burtenshaw Mar 27, 2024
4e6f383
refactor: restructure records in submodules
burtenshaw Mar 27, 2024
f4af443
refactor: use RecordModel and ResponseModel in record api
burtenshaw Mar 27, 2024
2d6eb00
test: update tests to use fields and suggestions attributes
burtenshaw Mar 27, 2024
e165e00
feat: implement fields and suggestions attributes within RecordFields…
burtenshaw Mar 27, 2024
f3fa7f9
chore: expand record ingestion function with logging and validation
burtenshaw Mar 27, 2024
1e11a23
fix: catch missing datasets in erroneous suggestions
burtenshaw Mar 27, 2024
ecf8186
chore: document and refactor record resource
burtenshaw Mar 27, 2024
ff8eb41
chore: refactor and delete submodule records.utils
burtenshaw Mar 27, 2024
cba263e
chore: delete redundant questions module
burtenshaw Mar 27, 2024
3c813f1
chore: liscencing in records module
burtenshaw Mar 27, 2024
c85f3ca
test: revise test for new responses as attribute model
burtenshaw Mar 27, 2024
1e7c890
feat: implement sorter in ingestion function to make either response …
burtenshaw Mar 27, 2024
7211cfe
feat: add flats for as_sugestion to DatasetRecords specify suggestion…
burtenshaw Mar 27, 2024
95edbba
feat: add core properties to response resource
burtenshaw Mar 27, 2024
8358471
chore: log dataset name in add and update records
burtenshaw Apr 2, 2024
c0ff4ff
chore: reduce logging every record
burtenshaw Apr 2, 2024
3285d67
chore: naming and formatting in Record
burtenshaw Apr 2, 2024
0992339
fix: question_name typing in suggestion
burtenshaw Apr 2, 2024
fcd79d2
Merge branch 'feature/update-records' into feature/add-records-with-r…
burtenshaw Apr 2, 2024
623e3d7
chore: delete excess dataset_records code from dataset module
burtenshaw Apr 2, 2024
d8bfc92
chore: tidy excess imports in record model
burtenshaw Apr 2, 2024
d4be07c
test: add testing for export records to generic python structures
burtenshaw Apr 2, 2024
2d59c24
feat: implement export mixin for generic python structures
burtenshaw Apr 2, 2024
993b602
feat: integrate export mixin with dataset records
burtenshaw Apr 2, 2024
fffc7fe
feat: expose record and metadata property
burtenshaw Apr 2, 2024
016b657
feat: add serialize to response resource
burtenshaw Apr 2, 2024
47134ad
feat: add serialize method to suggestion resource
burtenshaw Apr 2, 2024
d8cb7e0
docs: add documentation to _generic export mixin
burtenshaw Apr 3, 2024
bed6850
Merge branch 'main' into feature/pull-records
burtenshaw Apr 8, 2024
234329a
fix: field id is not serialized in model
burtenshaw Apr 10, 2024
6b45602
feat: add len attributes to resources for representation
burtenshaw Apr 10, 2024
14be0a1
feat: implement html representation for notebooks
burtenshaw Apr 10, 2024
6467835
feat: use html repr in client resource series
burtenshaw Apr 10, 2024
b9018d3
feat: update end to end tutorial for refactor
burtenshaw Apr 10, 2024
d568e20
feat: include mapping in notebook
burtenshaw Apr 10, 2024
a2a23d1
docs: update prose in end to end notebook
burtenshaw Apr 10, 2024
e92d008
Merge branch 'fix/ingest-records-with-mapped-fields' into docs/end-to…
burtenshaw Apr 11, 2024
de79b2d
fix: simplify html_repr
burtenshaw Apr 11, 2024
a88da58
chore: update notebook with mapping
burtenshaw Apr 11, 2024
da1bb94
Merge branch 'fix/ingest-records-with-mapped-fields' into docs/end-to…
burtenshaw Apr 15, 2024
3133219
Merge branch 'fix/ingest-records-with-mapped-fields' into docs/end-to…
burtenshaw Apr 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
457 changes: 212 additions & 245 deletions docs/_source/tutorials/end-to-end-argilla-tutorial.ipynb

Large diffs are not rendered by default.

58 changes: 58 additions & 0 deletions src/argilla_sdk/_helpers/_resource_repr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from typing import Any, Dict

from IPython.display import HTML


RESOURCE_REPR_CONFIG = {
"Dataset": {
"columns": ["name", "id", "workspace_id", "updated_at", "records"],
"table_name": "Datasets",
# "len_column": "records",
},
"Workspace": {
"columns": ["name", "id", "datasets", "updated_at"],
"table_name": "Workspaces",
"len_column": "datasets",
},
"User": {"columns": ["username", "id", "role", "updated_at"], "table_name": "Users"},
}


class ResourceHTMLReprMixin:
def _resource_to_table_row(self, resource) -> Dict[str, Any]:
row = {}
dumped_resource_model = resource._model.model_dump()
resource_name = resource.__class__.__name__
config = RESOURCE_REPR_CONFIG[resource_name].copy()
len_column = config.pop("len_column", None)
columns = config["columns"]
if len_column is not None:
row[len_column] = len(resource)
columns = [column for column in columns if column != len_column]

for column in columns:
row[column] = dumped_resource_model[column]

return row

def _resource_to_table_name(self, resource) -> str:
resource_name = resource.__class__.__name__
return RESOURCE_REPR_CONFIG[resource_name]["table_name"]

def _represent_as_html(self, resources) -> HTML:
table_name = self._resource_to_table_name(resources[0])
table_rows = [self._resource_to_table_row(resource) for resource in resources]

html_table = f"<h3>{table_name}</h3><table><tr>"
for column in table_rows[0]:
html_table += f"<th>{column}</th>"
html_table += "</tr>"

for row in table_rows:
html_table += "<tr>"
for column in row:
html_table += f"<td>{row[column]}</td>"
html_table += "</tr>"

html_table += "</table>"
return HTML(html_table)._repr_html_()
9 changes: 6 additions & 3 deletions src/argilla_sdk/_models/_settings/_fields.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from typing import Optional
from uuid import UUID

from pydantic import BaseModel, validator

from typing import Optional
from pydantic import BaseModel, validator, field_serializer

from argilla_sdk._helpers._log import log

Expand Down Expand Up @@ -32,6 +31,10 @@ def __title_default(cls, title, values):
log(f"TextField title is {validated_title}")
return validated_title

@field_serializer("id", when_used="unless-none")
def serialize_id(self, value: UUID) -> str:
return str(value)


class TextFieldModel(FieldBaseModel):
settings: FieldSettings = FieldSettings(type="text", use_markdown=False)
21 changes: 17 additions & 4 deletions src/argilla_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from abc import abstractmethod
from collections.abc import Sequence
from typing import TYPE_CHECKING, overload

import argilla_sdk as rg
from argilla_sdk import _api
from argilla_sdk._helpers._resource_repr import ResourceHTMLReprMixin

if TYPE_CHECKING:
from argilla_sdk import Workspace
Expand Down Expand Up @@ -44,7 +45,7 @@ def me(self) -> "User":
return User(client=self, _model=self.api.users.get_me())


class Users(Sequence):
class Users(Sequence, ResourceHTMLReprMixin):
"""A collection of users. It can be used to create a new user or to get an existing one."""

def __init__(self, client: "Argilla") -> None:
Expand Down Expand Up @@ -78,8 +79,12 @@ def __getitem__(self, index):
def __len__(self) -> int:
return len(self._api.list())

def _repr_html_(self) -> "HTML":
resources = [self[i] for i in range(len(self))]
return self._represent_as_html(resources=resources)


class Workspaces(Sequence):
class Workspaces(Sequence, ResourceHTMLReprMixin):
"""A collection of workspaces. It can be used to create a new workspace or to get an existing one."""

def __init__(self, client: "Argilla") -> None:
Expand Down Expand Up @@ -114,8 +119,12 @@ def __getitem__(self, index: int) -> "Workspace":
def __len__(self) -> int:
return len(self._api.list())

def _repr_html_(self) -> "HTML":
resources = [self[i] for i in range(len(self))]
return self._represent_as_html(resources=resources)

class Datasets(Sequence):

class Datasets(Sequence, ResourceHTMLReprMixin):
"""A collection of datasets. It can be used to create a new dataset or to get an existing one."""

def __init__(self, client: "Argilla") -> None:
Expand Down Expand Up @@ -150,3 +159,7 @@ def __getitem__(self, index) -> "Dataset":

def __len__(self) -> int:
return len(self._api.list())

def _repr_html_(self) -> "HTML":
resources = [self[i] for i in range(len(self))]
return self._represent_as_html(resources=resources)
3 changes: 3 additions & 0 deletions src/argilla_sdk/datasets/_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,9 @@ def __init__(
self.__records = DatasetRecords(client=self._client, dataset=self)
self._sync(model=self._model)

def __len__(self) -> int:
return len(self.records)

@property
def records(self) -> "DatasetRecords":
return self.__records
Expand Down
5 changes: 5 additions & 0 deletions src/argilla_sdk/records/_dataset_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def __init__(self, client: "Argilla", dataset: "Dataset"):
"""
self.__client = client
self.__dataset = dataset
self.__records = []

def __iter__(self):
return DatasetRecordsIterator(self.__dataset, self.__client)
Expand All @@ -114,6 +115,9 @@ def __call__(
with_responses=with_responses,
)

def __len__(self) -> int:
return len(self.__records)

############################
# Public methods
############################
Expand All @@ -136,6 +140,7 @@ def add(
message=f"Added {len(records_models)} records to dataset {self.__dataset.name}",
level="info",
)
self.__records = records_models

def update(
self, records: Union[dict, List[dict]], mapping: Optional[Dict[str, str]] = None, user_id: Optional[UUID] = None
Expand Down
11 changes: 11 additions & 0 deletions src/argilla_sdk/workspaces/_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,21 @@ def __init__(
super().__init__(client=client, api=client.api.workspaces)
self._sync(model=WorkspaceModel(name=name, id=id) if not _model else _model)

def __len__(self) -> int:
return len(self.datasets)

def list_datasets(self) -> List["DatasetModel"]:
datasets = self._client.api.datasets.list(self.id)
self.log(f"Got {len(datasets)} datasets for workspace {self.id}")
return datasets

def exists(self) -> bool:
return self._api.exists(self.id)

############################
# Properties
############################

@property
def datasets(self) -> List["DatasetModel"]:
return self.list_datasets()