From e8ec4b31497d665af3e5d3e40953f34775b38d42 Mon Sep 17 00:00:00 2001 From: john Date: Mon, 16 Sep 2024 16:01:45 -0400 Subject: [PATCH 1/3] Questio budget is back! --- edsl/jobs/interviews/Interview.py | 18 ++- edsl/questions/QuestionBase.py | 9 +- edsl/questions/QuestionBudget.py | 128 ++++++++++++------ edsl/questions/templates/budget/__init__.py | 0 .../budget/answering_instructions.jinja | 5 + .../budget/question_presentation.jinja | 7 + 6 files changed, 118 insertions(+), 49 deletions(-) create mode 100644 edsl/questions/templates/budget/__init__.py create mode 100644 edsl/questions/templates/budget/answering_instructions.jinja create mode 100644 edsl/questions/templates/budget/question_presentation.jinja diff --git a/edsl/jobs/interviews/Interview.py b/edsl/jobs/interviews/Interview.py index 5d8f7aa1..ce5a407d 100644 --- a/edsl/jobs/interviews/Interview.py +++ b/edsl/jobs/interviews/Interview.py @@ -268,6 +268,8 @@ async def _answer_question_and_record_task( ) -> "AgentResponseDict": """Answer a question and records the task.""" + had_language_model_no_response_error = False + @retry( stop=stop_after_attempt(EDSL_MAX_ATTEMPTS), wait=wait_exponential( @@ -306,6 +308,7 @@ async def attempt_answer(): except asyncio.TimeoutError as e: self._handle_exception(e, invigilator, task) + had_language_model_no_response_error = True raise LanguageModelNoResponseError( f"Language model timed out for question '{question.question_name}.'" ) @@ -314,15 +317,16 @@ async def attempt_answer(): self._handle_exception(e, invigilator, task) if "response" not in locals(): + had_language_model_no_response_error = True raise LanguageModelNoResponseError( f"Language model did not return a response for question '{question.question_name}.'" ) - # it got fixed! - if question.question_name in self.exceptions: + + # if it gets here, it means the no response error was fixed + if question.question_name in self.exceptions and had_language_model_no_response_error: self.exceptions.record_fixed_question(question.question_name) - # breakpoint() - + return response try: @@ -375,6 +379,8 @@ def _handle_exception( ): import copy + #breakpoint() + answers = copy.copy(self.answers) exception_entry = InterviewExceptionEntry( exception=e, @@ -385,6 +391,10 @@ def _handle_exception( task.task_status = TaskStatus.FAILED self.exceptions.add(invigilator.question.question_name, exception_entry) + if self.raise_validation_errors: + if isinstance(e, QuestionAnswerValidationError): + raise e + if hasattr(self, "stop_on_exception"): stop_on_exception = self.stop_on_exception else: diff --git a/edsl/questions/QuestionBase.py b/edsl/questions/QuestionBase.py index b0ed4c9b..f03a5aad 100644 --- a/edsl/questions/QuestionBase.py +++ b/edsl/questions/QuestionBase.py @@ -482,10 +482,13 @@ def html( if scenario is None: scenario = {} + prior_answers_dict = {} - for key, value in answers.items(): - if not key.endswith("_comment") and not key.endswith("_generated_tokens"): - prior_answers_dict[key] = {"answer": value} + + if isinstance(answers, dict): + for key, value in answers.items(): + if not key.endswith("_comment") and not key.endswith("_generated_tokens"): + prior_answers_dict[key] = {"answer": value} # breakpoint() diff --git a/edsl/questions/QuestionBudget.py b/edsl/questions/QuestionBudget.py index 9d7da078..3ac3daa7 100644 --- a/edsl/questions/QuestionBudget.py +++ b/edsl/questions/QuestionBudget.py @@ -1,9 +1,59 @@ from __future__ import annotations import random -from typing import Any, Optional, Union +from typing import Any, Optional, Union, List + + +from pydantic import Field, BaseModel, validator + from edsl.questions.QuestionBase import QuestionBase from edsl.questions.descriptors import IntegerDescriptor, QuestionOptionsDescriptor +from edsl.questions.ResponseValidatorABC import ResponseValidatorABC + +class BudgewResponseValidator(ResponseValidatorABC): + valid_examples = [] + + invalid_examples = [] + + def fix(self, response, verbose=False): + if verbose: + print(f"Fixing list response: {response}") + answer = str(response.get("answer") or response.get("generated_tokens", "")) + if len(answer.split(",")) > 0: + return ( + {"answer": answer.split(",")} | {"comment": response.get("comment")} + if "comment" in response + else {} + ) + +def create_budget_model(budget_sum: float, permissive: bool, question_options: List[str]): + class BudgetResponse(BaseModel): + answer: List[float] = Field( + ..., + description="List of non-negative numbers representing budget allocation", + min_items=len(question_options), + max_items=len(question_options) + ) + comment: Optional[str] = None + generated_tokens: Optional[str] = None + + @validator('answer') + def validate_answer(cls, v): + if len(v) != len(question_options): + raise ValueError(f"Must provide {len(question_options)} values") + if any(x < 0 for x in v): + raise ValueError("All values must be non-negative") + total = sum(v) + if not permissive and total != budget_sum: + raise ValueError(f"Sum of numbers must equal {budget_sum}") + elif permissive and total > budget_sum: + raise ValueError(f"Sum of numbers cannot exceed {budget_sum}") + return v + + class Config: + extra = 'forbid' + + return BudgetResponse class QuestionBudget(QuestionBase): """This question prompts the agent to allocate a budget among options.""" @@ -12,7 +62,7 @@ class QuestionBudget(QuestionBase): budget_sum: int = IntegerDescriptor(none_allowed=False) question_options: list[str] = QuestionOptionsDescriptor(q_budget=True) _response_model = None - response_validator_class = None + response_validator_class = BudgewResponseValidator def __init__( self, @@ -20,8 +70,10 @@ def __init__( question_text: str, question_options: list[str], budget_sum: int, + include_comment: bool = True, question_presentation: Optional[str] = None, answering_instructions: Optional[str] = None, + permissive: bool = False, ): """Instantiate a new QuestionBudget. @@ -36,20 +88,13 @@ def __init__( self.budget_sum = budget_sum self.question_presentation = question_presentation self.answering_instructions = answering_instructions + self.permissive = permissive + self.include_comment = include_comment - ################ - # Answer methods - ################ - def _validate_answer(self, answer: dict[str, Any]) -> dict[str, Union[int, str]]: - """Validate the answer.""" - self._validate_answer_template_basic(answer) - self._validate_answer_key_value(answer, "answer", dict) - self._validate_answer_budget(answer) - return answer - - def _translate_answer_code_to_answer( - self, answer_codes: dict[str, int], scenario: "Scenario" = None - ): + def create_response_model(self): + return create_budget_model(self.budget_sum, self.permissive, self.question_options) + + def _translate_answer_code_to_answer(self, answer_code, combined_dict) -> list[dict]: """ Translate the answer codes to the actual answers. @@ -58,35 +103,35 @@ def _translate_answer_code_to_answer( This code will translate that to "a". """ translated_codes = [] - for answer_code, response in answer_codes.items(): - translated_codes.append({self.question_options[int(answer_code)]: response}) + for answer_code, question_option in zip(answer_code, self.question_options): + translated_codes.append({question_option: answer_code}) return translated_codes - def _simulate_answer(self, human_readable=True): - """Simulate a valid answer for debugging purposes (what the validator expects).""" - from edsl.utilities.utilities import random_string - - if human_readable: - keys = self.question_options - else: - keys = range(len(self.question_options)) - remaining_budget = self.budget_sum - values = [] - for _ in range(len(self.question_options)): - if _ == len(self.question_options) - 1: - # Assign remaining budget to the last value - values.append(remaining_budget) - else: - # Generate a random value between 0 and remaining budget - value = random.randint(0, remaining_budget) - values.append(value) - remaining_budget -= value - answer = dict(zip(keys, values)) - return { - "answer": answer, - "comment": random_string(), - } + # def _simulate_answer(self, human_readable=True): + # """Simulate a valid answer for debugging purposes (what the validator expects).""" + # from edsl.utilities.utilities import random_string + + # if human_readable: + # keys = self.question_options + # else: + # keys = range(len(self.question_options)) + # remaining_budget = self.budget_sum + # values = [] + # for _ in range(len(self.question_options)): + # if _ == len(self.question_options) - 1: + # # Assign remaining budget to the last value + # values.append(remaining_budget) + # else: + # # Generate a random value between 0 and remaining budget + # value = random.randint(0, remaining_budget) + # values.append(value) + # remaining_budget -= value + # answer = dict(zip(keys, values)) + # return { + # "answer": answer, + # "comment": random_string(), + # } @property def question_html_content(self) -> str: @@ -171,5 +216,4 @@ def main(): # results = q.run() import doctest - doctest.testmod(optionflags=doctest.ELLIPSIS) diff --git a/edsl/questions/templates/budget/__init__.py b/edsl/questions/templates/budget/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/edsl/questions/templates/budget/answering_instructions.jinja b/edsl/questions/templates/budget/answering_instructions.jinja new file mode 100644 index 00000000..b248847e --- /dev/null +++ b/edsl/questions/templates/budget/answering_instructions.jinja @@ -0,0 +1,5 @@ +Return only a comma-separated list the values in the same order as the options, with 0s included, on one line, in square braces. + +Example: if there are 4 options, the response should be "[25,25,25,25]" to allocate 25 to each option. + +On the next line, return a text comment on your choice. diff --git a/edsl/questions/templates/budget/question_presentation.jinja b/edsl/questions/templates/budget/question_presentation.jinja new file mode 100644 index 00000000..05a4d4f9 --- /dev/null +++ b/edsl/questions/templates/budget/question_presentation.jinja @@ -0,0 +1,7 @@ +{{question_text}} +The options are +{% for option in question_options %} +{{ loop.index0 }}: {{option}} +{% endfor %} +Allocate your budget of {{budget_sum}} among the options. + From 8544914010cac431ee32829000c9f7a9927624c1 Mon Sep 17 00:00:00 2001 From: John Horton Date: Mon, 16 Sep 2024 21:47:09 -0400 Subject: [PATCH 2/3] Updates test for new budget format --- edsl/jobs/interviews/Interview.py | 12 +++++--- edsl/jobs/tasks/TaskHistory.py | 2 +- edsl/questions/QuestionBudget.py | 25 ++++++++++++----- tests/questions/test_QuestionBudget.py | 39 +++++++++++++------------- 4 files changed, 47 insertions(+), 31 deletions(-) diff --git a/edsl/jobs/interviews/Interview.py b/edsl/jobs/interviews/Interview.py index ce5a407d..aaed9008 100644 --- a/edsl/jobs/interviews/Interview.py +++ b/edsl/jobs/interviews/Interview.py @@ -279,6 +279,8 @@ async def _answer_question_and_record_task( reraise=True, ) async def attempt_answer(): + nonlocal had_language_model_no_response_error + invigilator = self._get_invigilator(question) if self._skip_this_question(question): @@ -322,11 +324,13 @@ async def attempt_answer(): f"Language model did not return a response for question '{question.question_name}.'" ) - # if it gets here, it means the no response error was fixed - if question.question_name in self.exceptions and had_language_model_no_response_error: + if ( + question.question_name in self.exceptions + and had_language_model_no_response_error + ): self.exceptions.record_fixed_question(question.question_name) - + return response try: @@ -379,7 +383,7 @@ def _handle_exception( ): import copy - #breakpoint() + # breakpoint() answers = copy.copy(self.answers) exception_entry = InterviewExceptionEntry( diff --git a/edsl/jobs/tasks/TaskHistory.py b/edsl/jobs/tasks/TaskHistory.py index 3b05e74a..1917aef9 100644 --- a/edsl/jobs/tasks/TaskHistory.py +++ b/edsl/jobs/tasks/TaskHistory.py @@ -54,7 +54,7 @@ def exceptions(self): def unfixed_exceptions(self): """ >>> len(TaskHistory.example().unfixed_exceptions) - 0 + 4 """ return [ i.exceptions diff --git a/edsl/questions/QuestionBudget.py b/edsl/questions/QuestionBudget.py index 3ac3daa7..8f8fe149 100644 --- a/edsl/questions/QuestionBudget.py +++ b/edsl/questions/QuestionBudget.py @@ -10,6 +10,7 @@ from edsl.questions.ResponseValidatorABC import ResponseValidatorABC + class BudgewResponseValidator(ResponseValidatorABC): valid_examples = [] @@ -25,19 +26,23 @@ def fix(self, response, verbose=False): if "comment" in response else {} ) - -def create_budget_model(budget_sum: float, permissive: bool, question_options: List[str]): + + +def create_budget_model( + budget_sum: float, permissive: bool, question_options: List[str] +): + class BudgetResponse(BaseModel): answer: List[float] = Field( ..., description="List of non-negative numbers representing budget allocation", min_items=len(question_options), - max_items=len(question_options) + max_items=len(question_options), ) comment: Optional[str] = None generated_tokens: Optional[str] = None - @validator('answer') + @validator("answer") def validate_answer(cls, v): if len(v) != len(question_options): raise ValueError(f"Must provide {len(question_options)} values") @@ -51,10 +56,11 @@ def validate_answer(cls, v): return v class Config: - extra = 'forbid' + extra = "forbid" return BudgetResponse + class QuestionBudget(QuestionBase): """This question prompts the agent to allocate a budget among options.""" @@ -92,9 +98,13 @@ def __init__( self.include_comment = include_comment def create_response_model(self): - return create_budget_model(self.budget_sum, self.permissive, self.question_options) + return create_budget_model( + self.budget_sum, self.permissive, self.question_options + ) - def _translate_answer_code_to_answer(self, answer_code, combined_dict) -> list[dict]: + def _translate_answer_code_to_answer( + self, answer_code, combined_dict + ) -> list[dict]: """ Translate the answer codes to the actual answers. @@ -216,4 +226,5 @@ def main(): # results = q.run() import doctest + doctest.testmod(optionflags=doctest.ELLIPSIS) diff --git a/tests/questions/test_QuestionBudget.py b/tests/questions/test_QuestionBudget.py index acdff71d..2b96f90f 100644 --- a/tests/questions/test_QuestionBudget.py +++ b/tests/questions/test_QuestionBudget.py @@ -5,8 +5,8 @@ from edsl.questions.QuestionBudget import QuestionBudget, main -def test_QuestionBudget_main(): - main() +# def test_QuestionBudget_main(): +# main() valid_question = { @@ -103,7 +103,8 @@ def test_QuestionBudget_construction(): def test_QuestionBudget_answers(): - valid_answer = {"answer": {"0": 25, "1": 25, "2": 25, "3": 25}, "comment": "Yum!"} + # valid_answer = {"answer": {"0": 25, "1": 25, "2": 25, "3": 25}, "comment": "Yum!"} + valid_answer = {"answer": [25, 25, 25, 25], "comment": "Yum!"} q = QuestionBudget(**valid_question) # answer must be an integer or interpretable as integer q._validate_answer(valid_answer) @@ -139,25 +140,25 @@ def test_QuestionBudget_extras(): q = QuestionBudget(**valid_question) # instructions # translate - assert q._translate_answer_code_to_answer({"0": 25, "1": 25, "2": 25, "3": 25}) == [ + assert q._translate_answer_code_to_answer([25, 25, 25, 25], {}) == [ {"Pizza": 25}, {"Ice Cream": 25}, {"Burgers": 25}, {"Salad": 25}, ] # _simulate_answer - assert q._simulate_answer().keys() == q._simulate_answer(human_readable=True).keys() - simulated_answer = q._simulate_answer(human_readable=False) - assert isinstance(simulated_answer, dict) - assert "answer" in simulated_answer - assert "comment" in simulated_answer - assert isinstance(simulated_answer["answer"], dict) - assert all( - [type(k) == int and k in range(len(q.question_options))] - for k in simulated_answer["answer"].keys() - ) - assert round(sum(simulated_answer["answer"].values())) == q.budget_sum - assert list(q._simulate_answer(human_readable=False)["answer"].keys()) == list( - range(len(q.question_options)) - ) - # form elements + # assert q._simulate_answer().keys() == q._simulate_answer(human_readable=True).keys() + # simulated_answer = q._simulate_answer(human_readable=False) + # assert isinstance(simulated_answer, dict) + # assert "answer" in simulated_answer + # assert "comment" in simulated_answer + # assert isinstance(simulated_answer["answer"], dict) + # assert all( + # [type(k) == int and k in range(len(q.question_options))] + # for k in simulated_answer["answer"].keys() + # ) + # assert round(sum(simulated_answer["answer"].values())) == q.budget_sum + # assert list(q._simulate_answer(human_readable=False)["answer"].keys()) == list( + # range(len(q.question_options)) + # ) + # # form elements From 4283af1754699668a36afc0b6cb5b8703c6c5565 Mon Sep 17 00:00:00 2001 From: John Horton Date: Mon, 16 Sep 2024 21:48:34 -0400 Subject: [PATCH 3/3] Comment option --- edsl/questions/templates/budget/answering_instructions.jinja | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/edsl/questions/templates/budget/answering_instructions.jinja b/edsl/questions/templates/budget/answering_instructions.jinja index b248847e..e91ed414 100644 --- a/edsl/questions/templates/budget/answering_instructions.jinja +++ b/edsl/questions/templates/budget/answering_instructions.jinja @@ -2,4 +2,6 @@ Return only a comma-separated list the values in the same order as the options, Example: if there are 4 options, the response should be "[25,25,25,25]" to allocate 25 to each option. -On the next line, return a text comment on your choice. +{% if include_comment %} +After the answer, you can put a comment explaining your choice on the next line. +{% endif %}