This repository has been archived by the owner on Oct 2, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature to export dataset records as generic python structures (#49)
* test: add integration tests for updating records * feat: add basic update logic to records api * feat: implement update filter logic in dataset records * fix: excess import in suggestion model * feat: add from model class method to reponse resource * feat: implement suggestion as complete resource with from_model method and properties * feat: implement Record as resource with containers * refactor: restructure records in submodules * refactor: use RecordModel and ResponseModel in record api * test: update tests to use fields and suggestions attributes * feat: implement fields and suggestions attributes within RecordFields and RecordSuggestions * chore: expand record ingestion function with logging and validation * fix: catch missing datasets in erroneous suggestions * chore: document and refactor record resource * chore: refactor and delete submodule records.utils * chore: delete redundant questions module * chore: liscencing in records module * test: revise test for new responses as attribute model * feat: implement sorter in ingestion function to make either response or suggestion * feat: add flats for as_sugestion to DatasetRecords specify suggestion or responses coming in * feat: add core properties to response resource * chore: log dataset name in add and update records * chore: reduce logging every record * chore: naming and formatting in Record * fix: question_name typing in suggestion * chore: delete excess dataset_records code from dataset module * chore: tidy excess imports in record model * test: add testing for export records to generic python structures * feat: implement export mixin for generic python structures * feat: integrate export mixin with dataset records * feat: expose record and metadata property * feat: add serialize to response resource * feat: add serialize method to suggestion resource * docs: add documentation to _generic export mixin * test: update tests for nested export configuration * feat: add to_dict methods in responses and suggestions * refactor: refactor generic export for simplicity and to_dict methods * feat: implement to_dict method for resourcer * feat: move to_dict out of export module
- Loading branch information
1 parent
7695a68
commit 6ce7709
Showing
15 changed files
with
439 additions
and
186 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from argilla_sdk.records._export._generic import GenericExportMixin # noqa |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
# Copyright 2024-present, Argilla, Inc. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
from typing import Any, Dict, List, TYPE_CHECKING, Union | ||
from collections import defaultdict | ||
|
||
if TYPE_CHECKING: | ||
from argilla_sdk import Record | ||
|
||
|
||
class GenericExportMixin: | ||
"""This is a mixin class for DatasetRecords and Export classes. | ||
It handles methods for exporting records to generic python formats.""" | ||
|
||
def _export_to_dict( | ||
self, records: List["Record"], flatten=True, orient="names" | ||
) -> Dict[str, Union[str, float, int, list]]: | ||
"""Export records to a dictionary with either names or record index as keys. | ||
Args: | ||
records (List[Record]): List of Record objects to export. | ||
flatten (bool): The structure of the exported dictionary. | ||
- True: The record fields, metadata, suggestions and responses will be flattened. | ||
- False: The record fields, metadata, suggestions and responses will be nested. | ||
orient (str): The orientation of the exported dictionary. | ||
- "names": The keys of the dictionary will be the names of the fields, metadata, suggestions and responses. | ||
- "index": The keys of the dictionary will be the external_id of the records. | ||
Returns: | ||
dataset_records (Dict[str, Union[str, float, int, list]]): The exported records in a dictionary format. | ||
""" | ||
if orient == "names": | ||
dataset_records: dict = defaultdict(list) | ||
for record in records: | ||
for key, value in self.__record_to_dict(record=record, flatten=flatten).items(): | ||
dataset_records[key].append(value) | ||
elif orient == "index": | ||
dataset_records: dict = {} | ||
for record in records: | ||
dataset_records[record.external_id] = self.__record_to_dict(record=record, flatten=flatten) | ||
else: | ||
raise ValueError(f"Invalid value for orient parameter: {orient}") | ||
return dataset_records | ||
|
||
def _export_to_list(self, records: List["Record"], flatten=True) -> List[Dict[str, Union[str, float, int, list]]]: | ||
"""Export records to a list of dictionaries with either names or record index as keys. | ||
Args: | ||
records (List[Record]): List of Record objects to export. | ||
flatten (bool): The structure of the exported dictionary. | ||
- True: The record fields, metadata, suggestions and responses will be flattened. | ||
- False: The record fields, metadata, suggestions and responses will be nested. | ||
Returns: | ||
dataset_records (List[Dict[str, Union[str, float, int, list]]]): The exported records in a list of dictionaries format. | ||
""" | ||
dataset_records: list = [] | ||
for record in records: | ||
dataset_records.append(self.__record_to_dict(record=record, flatten=flatten)) | ||
return dataset_records | ||
|
||
def __record_to_dict(self, record: "Record", flatten=True) -> Dict[str, Any]: | ||
"""Converts a Record object to a dictionary for export. | ||
Args: | ||
record (Record): The Record object to convert. | ||
flatten (bool): The structure of the exported dictionary. | ||
- True: The record fields, metadata, suggestions and responses will be flattened | ||
so that their keys becomes the keys of the record dictionary, using | ||
dot notation for nested keys. i.e. `label.suggestion` and `label.response` | ||
- False: The record fields, metadata, suggestions and responses will be nested as | ||
dictionaries within the record dictionary. i.e. `label: {suggestion: ..., response: ...}` | ||
Returns: | ||
A dictionary representing the record. | ||
""" | ||
record_dict = record.to_dict() | ||
if flatten: | ||
responses: dict = record_dict.pop("responses") | ||
suggestions: dict = record_dict.pop("suggestions") | ||
fields: dict = record_dict.pop("fields") | ||
metadata: dict = record_dict.pop("metadata") | ||
record_dict.update(fields) | ||
record_dict.update(metadata) | ||
question_names = set(suggestions.keys()).union(responses.keys()) | ||
for question_name in question_names: | ||
_suggestion: Union[Dict, None] = suggestions.get(question_name) | ||
if _suggestion: | ||
record_dict[f"{question_name}.suggestion"] = _suggestion.pop("value") | ||
record_dict.update( | ||
{f"{question_name}.suggestion.{key}": value for key, value in _suggestion.items()} | ||
) | ||
for _response in responses.get(question_name, []): | ||
user_id = _response.pop("user_id") | ||
record_dict[f"{question_name}.response.{user_id}"] = _response.pop("value") | ||
record_dict.update( | ||
{f"{question_name}.response.{user_id}.{key}": value for key, value in _response.items()} | ||
) | ||
return record_dict |
Oops, something went wrong.