argilla-io · frascuchon · Mar 11, 2024 · Feb 29, 2024 · Feb 29, 2024 · Feb 29, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -19,6 +19,7 @@ These are the section headers that we use:
 ### Added
 
 - Added support for span questions in the Python SDK. ([#4617](https://github.com/argilla-io/argilla/pull/4617))
+- Added support for spans values in suggestions and responses. ([#4623](https://github.com/argilla-io/argilla/pull/4623))
 
 ### Fixed
 

diff --git a/src/argilla/client/feedback/integrations/huggingface/dataset.py b/src/argilla/client/feedback/integrations/huggingface/dataset.py
@@ -11,11 +11,11 @@
 #  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-
 import json
 import logging
 import tempfile
 import warnings
+from copy import copy
 from typing import TYPE_CHECKING, Any, Optional, Type, Union
 
 from packaging.version import parse as parse_version
@@ -50,7 +50,7 @@ def _huggingface_format(dataset: Union["FeedbackDataset", "RemoteFeedbackDataset
             questions, and metadata_properties formatted as `datasets.Features`.
 
         Examples:
-            >>> from argilla.client.feedback.integrations.dataset import HuggingFaceDatasetMixin
+            >>> from argilla.client.feedback.integrations.huggingface import HuggingFaceDatasetMixin
             >>> dataset = FeedbackDataset(...) or RemoteFeedbackDataset(...)
             >>> huggingface_dataset = HuggingFaceDatasetMixin._huggingface_format(dataset)
         """
@@ -71,17 +71,38 @@ def _huggingface_format(dataset: Union["FeedbackDataset", "RemoteFeedbackDataset
         for question in dataset.questions:
             if question.type in [QuestionTypes.text, QuestionTypes.label_selection]:
                 value = Value(dtype="string", id="question")
+                suggestion_value = copy(value)
             elif question.type == QuestionTypes.rating:
                 value = Value(dtype="int32", id="question")
+                suggestion_value = copy(value)
             elif question.type == QuestionTypes.ranking:
                 value = Sequence({"rank": Value(dtype="uint8"), "value": Value(dtype="string")}, id="question")
+                suggestion_value = copy(value)
             elif question.type in QuestionTypes.multi_label_selection:
                 value = Sequence(Value(dtype="string"), id="question")
+                suggestion_value = copy(value)
+            elif question.type in QuestionTypes.span:
+                value = Sequence(
+                    {
+                        "start": Value(dtype="int32"),
+                        "end": Value(dtype="int32"),
+                        "label": Value(dtype="string"),
+                    },
+                    id="question",
+                )
+                suggestion_value = Sequence(
+                    {
+                        "start": Value(dtype="int32"),
+                        "end": Value(dtype="int32"),
+                        "label": Value(dtype="string"),
+                        "score": Value(dtype="float32"),
+                    }
+                )
             else:
                 raise ValueError(
                     f"Question {question.name} is of type `{question.type}`,"
                     " for the moment only the following question types are supported:"
-                    f" `{'`, `'.join([arg.value for arg in QuestionTypes])}`."
+                    f" `{'`, `'.join(QuestionTypes.values())}`."
                 )
 
             hf_features[question.name] = [
@@ -94,8 +115,8 @@ def _huggingface_format(dataset: Union["FeedbackDataset", "RemoteFeedbackDataset
             if question.name not in hf_dataset:
                 hf_dataset[question.name] = []
 
-            value.id = "suggestion"
-            hf_features[f"{question.name}-suggestion"] = value
+            suggestion_value.id = "suggestion"
+            hf_features[f"{question.name}-suggestion"] = suggestion_value
             if f"{question.name}-suggestion" not in hf_dataset:
                 hf_dataset[f"{question.name}-suggestion"] = []
 
@@ -138,6 +159,15 @@ def _huggingface_format(dataset: Union["FeedbackDataset", "RemoteFeedbackDataset
                         }
                         if question.type == QuestionTypes.ranking:
                             value = [r.dict() for r in response.values[question.name].value]
+                        elif question.type == QuestionTypes.span:
+                            value = [
+                                {
+                                    "start": span.start,
+                                    "end": span.end,
+                                    "label": span.label,
+                                }
+                                for span in response.values[question.name].value
+                            ]
                         else:
                             value = response.values[question.name].value
                         formatted_response["value"] = value
@@ -421,6 +451,11 @@ def from_huggingface(
                         if value is not None:
                             if question.type == QuestionTypes.ranking:
                                 value = [{"rank": r, "value": v} for r, v in zip(value["rank"], value["value"])]
+                            elif question.type == QuestionTypes.span:
+                                value = [
+                                    {"start": s, "end": e, "label": l}
+                                    for s, e, l in zip(value["start"], value["end"], value["label"])
+                                ]
                             responses[user_id or "user_without_id"]["values"].update({question.name: {"value": value}})
 
                 # First if-condition is here for backwards compatibility
@@ -431,6 +466,11 @@ def from_huggingface(
                     value = hfds[index][f"{question.name}-suggestion"]
                     if question.type == QuestionTypes.ranking:
                         value = [{"rank": r, "value": v} for r, v in zip(value["rank"], value["value"])]
+                    elif question.type == QuestionTypes.span:
+                        value = [
+                            {"start": s, "end": e, "label": l}
+                            for s, e, l in zip(value["start"], value["end"], value["label"])
+                        ]
 
                     suggestion = {"question_name": question.name, "value": value}
                     if hfds[index][f"{question.name}-suggestion-metadata"] is not None:

diff --git a/src/argilla/client/feedback/metrics/utils.py b/src/argilla/client/feedback/metrics/utils.py
@@ -191,7 +191,7 @@ def get_unified_responses_and_suggestions(
         unified_responses = [
             tuple(ranking_schema.rank for ranking_schema in response) for response in unified_responses
         ]
-        suggestions = [tuple(s["rank"] for s in suggestion) for suggestion in suggestions]
+        suggestions = [tuple(s.rank for s in suggestion) for suggestion in suggestions]
 
     return unified_responses, suggestions
 

diff --git a/src/argilla/client/feedback/schemas/__init__.py b/src/argilla/client/feedback/schemas/__init__.py
@@ -37,14 +37,10 @@
     SpanQuestion,
     TextQuestion,
 )
-from argilla.client.feedback.schemas.records import (
-    FeedbackRecord,
-    RankingValueSchema,
-    ResponseSchema,
-    SortBy,
-    SuggestionSchema,
-    ValueSchema,
-)
+from argilla.client.feedback.schemas.records import FeedbackRecord, SortBy
+from argilla.client.feedback.schemas.response_values import RankingValueSchema, ResponseValue, SpanValueSchema
+from argilla.client.feedback.schemas.responses import ResponseSchema, ResponseStatus, ValueSchema
+from argilla.client.feedback.schemas.suggestions import SuggestionSchema
 from argilla.client.feedback.schemas.vector_settings import VectorSettings
 
 __all__ = [
@@ -67,10 +63,13 @@
     "SpanQuestion",
     "SpanLabelOption",
     "FeedbackRecord",
-    "RankingValueSchema",
     "ResponseSchema",
+    "ResponseValue",
+    "ResponseStatus",
     "SuggestionSchema",
     "ValueSchema",
+    "RankingValueSchema",
+    "SpanValueSchema",
     "SortOrder",
     "SortBy",
     "RecordSortField",

diff --git a/src/argilla/client/feedback/schemas/questions.py b/src/argilla/client/feedback/schemas/questions.py
@@ -17,6 +17,9 @@
 from typing import Any, Dict, List, Literal, Optional, Union
 
 from argilla.client.feedback.schemas.enums import QuestionTypes
+from argilla.client.feedback.schemas.response_values import parse_value_response_for_question
+from argilla.client.feedback.schemas.responses import ResponseValue, ValueSchema
+from argilla.client.feedback.schemas.suggestions import SuggestionSchema
 from argilla.client.feedback.schemas.utils import LabelMappingMixin
 from argilla.client.feedback.schemas.validators import title_must_have_value
 from argilla.pydantic_v1 import BaseModel, Extra, Field, conint, conlist, root_validator, validator
@@ -77,6 +80,16 @@ def to_server_payload(self) -> Dict[str, Any]:
             "settings": self.server_settings,
         }
 
+    def suggestion(self, value: ResponseValue, **kwargs) -> SuggestionSchema:
+        """Method that will be used to create a `SuggestionSchema` from the question and a suggested value."""
+        value = parse_value_response_for_question(self, value)
+        return SuggestionSchema(question_name=self.name, value=value, **kwargs)
+
+    def response(self, value: ResponseValue) -> Dict[str, ValueSchema]:
+        """Method that will be used to create a response from the question and a value."""
+        value = parse_value_response_for_question(self, value)
+        return {self.name: ValueSchema(value=value)}
+
 
 class TextQuestion(QuestionSchema):
     """Schema for the `FeedbackDataset` text questions, which are the ones that will
@@ -334,21 +347,35 @@ class SpanQuestion(QuestionSchema):
 
     Examples:
         >>> from argilla.client.feedback.schemas.questions import SpanQuestion
-        >>> SpanQuestion(name="span_question", title="Span Question", labels=["person", "org"])
+        >>> SpanQuestion(name="span_question", field="prompt", title="Span Question", labels=["person", "org"])
     """
 
     type: Literal[QuestionTypes.span] = Field(QuestionTypes.span, allow_mutation=False, const=True)
 
-    labels: conlist(Union[str, SpanLabelOption], min_items=1, unique_items=True)
+    field: str = Field(..., description="The field in the input that the user will be asked to annotate.")
+    labels: Union[Dict[str, str], conlist(Union[str, SpanLabelOption], min_items=1, unique_items=True)]
+
+    @validator("labels", pre=True)
+    def parse_labels_dict(cls, labels) -> List[SpanLabelOption]:
+        if isinstance(labels, dict):
+            return [SpanLabelOption(value=label, text=text) for label, text in labels.items()]
+        return labels
 
     @validator("labels", always=True)
     def normalize_labels(cls, v: List[Union[str, SpanLabelOption]]) -> List[SpanLabelOption]:
         return [SpanLabelOption(value=label, text=label) if isinstance(label, str) else label for label in v]
 
+    @validator("labels")
+    def labels_must_be_valid(cls, labels: List[SpanLabelOption]) -> List[SpanLabelOption]:
+        # This validator is needed since the conlist constraint does not work.
+        assert len(labels) > 0, "At least one label must be provided"
+        return labels
+
     @property
     def server_settings(self) -> Dict[str, Any]:
         return {
             "type": self.type,
+            "field": self.field,
             "options": [label.dict() for label in self.labels],
         }
 

diff --git a/src/argilla/client/feedback/schemas/records.py b/src/argilla/client/feedback/schemas/records.py
@@ -13,130 +13,21 @@
 #  limitations under the License.
 
 import warnings
-from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
 from uuid import UUID
 
-from argilla.client.feedback.schemas.enums import RecordSortField, ResponseStatus, SortOrder
-from argilla.pydantic_v1 import BaseModel, Extra, Field, PrivateAttr, StrictInt, StrictStr, conint, validator
+from argilla.client.feedback.schemas.enums import RecordSortField, SortOrder
+
+# Support backward compatibility for import of RankingValueSchema from records module
+from argilla.client.feedback.schemas.response_values import RankingValueSchema  # noqa
+from argilla.client.feedback.schemas.responses import ResponseSchema, ValueSchema  # noqa
+from argilla.client.feedback.schemas.suggestions import SuggestionSchema
+from argilla.pydantic_v1 import BaseModel, Extra, Field, PrivateAttr, validator
 
 if TYPE_CHECKING:
     from argilla.client.feedback.unification import UnifiedValueSchema
 
 
-class RankingValueSchema(BaseModel):
-    """Schema for the `RankingQuestion` response value for a `RankingQuestion`. Note that
-    we may have more than one record in the same rank.
-
-    Args:
-        value: The value of the record.
-        rank: The rank of the record.
-    """
-
-    value: StrictStr
-    rank: Optional[conint(ge=1)] = None
-
-
-class ValueSchema(BaseModel):
-    """Schema for any `FeedbackRecord` response value.
-
-    Args:
-        value: The value of the record.
-    """
-
-    value: Union[StrictStr, StrictInt, List[str], List[RankingValueSchema]]
-
-
-class ResponseSchema(BaseModel):
-    """Schema for the `FeedbackRecord` response.
-
-    Args:
-        user_id: ID of the user that provided the response. Defaults to None, and is
-            automatically fulfilled internally once the question is pushed to Argilla.
-        values: Values of the response, should match the questions in the record.
-        status: Status of the response. Defaults to `submitted`.
-
-    Examples:
-        >>> from argilla.client.feedback.schemas.records import ResponseSchema
-        >>> ResponseSchema(
-        ...     values={
-        ...         "question_1": {"value": "answer_1"},
-        ...         "question_2": {"value": "answer_2"},
-        ...     }
-        ... )
-    """
-
-    user_id: Optional[UUID] = None
-    values: Union[Dict[str, ValueSchema], None]
-    status: ResponseStatus = ResponseStatus.submitted
-
-    class Config:
-        extra = Extra.forbid
-        validate_assignment = True
-
-    @validator("user_id", always=True)
-    def user_id_must_have_value(cls, v):
-        if not v:
-            warnings.warn(
-                "`user_id` not provided, so it will be set to `None`. Which is not an"
-                " issue, unless you're planning to log the response in Argilla, as"
-                " it will be automatically set to the active `user_id`.",
-            )
-        return v
-
-    def to_server_payload(self) -> Dict[str, Any]:
-        """Method that will be used to create the payload that will be sent to Argilla
-        to create a `ResponseSchema` for a `FeedbackRecord`."""
-        return {
-            # UUID is not json serializable!!!
-            "user_id": self.user_id,
-            "values": {question_name: value.dict() for question_name, value in self.values.items()}
-            if self.values is not None
-            else None,
-            "status": self.status.value if hasattr(self.status, "value") else self.status,
-        }
-
-
-class SuggestionSchema(BaseModel):
-    """Schema for the suggestions for the questions related to the record.
-
-    Args:
-        question_name: name of the question in the `FeedbackDataset`.
-        type: type of the question. Defaults to None. Possible values are `model` or `human`.
-        score: score of the suggestion. Defaults to None.
-        value: value of the suggestion, which should match the type of the question.
-        agent: agent that generated the suggestion. Defaults to None.
-
-    Examples:
-        >>> from argilla.client.feedback.schemas.records import SuggestionSchema
-        >>> SuggestionSchema(
-        ...     question_name="question-1",
-        ...     type="model",
-        ...     score=0.9,
-        ...     value="This is the first suggestion",
-        ...     agent="agent-1",
-        ... )
-    """
-
-    question_name: str
-    type: Optional[Literal["model", "human"]] = None
-    score: Optional[float] = None
-    value: Any
-    agent: Optional[str] = None
-
-    class Config:
-        extra = Extra.forbid
-        validate_assignment = True
-
-    def to_server_payload(self, question_name_to_id: Dict[str, UUID]) -> Dict[str, Any]:
-        """Method that will be used to create the payload that will be sent to Argilla
-        to create a `SuggestionSchema` for a `FeedbackRecord`."""
-        # We can do this because there is no default values for the fields
-        payload = self.dict(exclude_unset=True, include={"type", "score", "value", "agent"})
-        payload["question_id"] = str(question_name_to_id[self.question_name])
-
-        return payload
-
-
 class FeedbackRecord(BaseModel):
     """Schema for the records of a `FeedbackDataset`.
 
@@ -159,7 +50,7 @@ class FeedbackRecord(BaseModel):
             Defaults to None.
 
     Examples:
-        >>> from argilla.client.feedback.schemas.records import FeedbackRecord, ResponseSchema, SuggestionSchema, ValueSchema
+        >>> from argilla.feedback import FeedbackRecord, ResponseSchema, SuggestionSchema, ValueSchema
         >>> FeedbackRecord(
         ...     fields={"text": "This is the first record", "label": "positive"},
         ...     metadata={"first": True, "nested": {"more": "stuff"}},
@@ -181,6 +72,7 @@ class FeedbackRecord(BaseModel):
         ...            value="This is the first suggestion",
         ...            agent="agent-1",
         ...         ),
+        ...     ],
         ...     external_id="entry-1",
         ... )