From e3d02d6d7d9f32ae6374379c00850fd5657d7386 Mon Sep 17 00:00:00 2001 From: Kirupa Gunaseelan Date: Wed, 24 Jul 2024 11:30:52 -0700 Subject: [PATCH] fix: smaller dataset for integ tests to reduce runtime --- .../datasets/triviaQA_sample_small.jsonl | 8 ++++++++ test/integration/test_qa_accuracy.py | 17 ++++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) create mode 100644 test/integration/datasets/triviaQA_sample_small.jsonl diff --git a/test/integration/datasets/triviaQA_sample_small.jsonl b/test/integration/datasets/triviaQA_sample_small.jsonl new file mode 100644 index 00000000..9e0563c7 --- /dev/null +++ b/test/integration/datasets/triviaQA_sample_small.jsonl @@ -0,0 +1,8 @@ +{"question":"Which american-born sinclair won the nobel prize for literature in 1930?","answer":"Harry Sinclair LewisGrace HeggerSinclair Lewislewis harry sinclair"} +{"question":"Where in england was dame judi dench born?","answer":"Park Grove (1895)York UAUN\/LOCODE:GBYRKYork, UKCity of YorkYork, EnglandYORKEoferwicYork"} +{"question":"In which decade did billboard magazine first publish and american hit chart?","answer":"30's30\u2019s30s"} +{"question":"Which city does david soul come from?","answer":"ChicagoChicago, IllinoisHog Butcher for the WorldChicago, Illinois, U.S.A.The city of ChicagoChi town"} +{"question":"Who won super bowl xx?","answer":"Chicago Bearschicago staleyschicago gatorsdecatur staleys"} +{"question":"Which was the first european country to abolish capital punishment?","answer":"Norv\u00e8geMainland NorwayNorwayrepublic of norway"} +{"question":"What is bruce willis' real first name?","answer":"Walterwalter disambiguationwalter tv series"} +{"question":"Which william wrote the novel lord of the flies?","answer":"GoldingWilliam Golding"} diff --git a/test/integration/test_qa_accuracy.py b/test/integration/test_qa_accuracy.py index 58328f48..a95d9a53 100644 --- a/test/integration/test_qa_accuracy.py +++ b/test/integration/test_qa_accuracy.py @@ -47,8 +47,8 @@ def test_evaluate_sample(self): def test_evaluate(self, integration_tests_dir): dataset_config = DataConfig( - dataset_name="triviaQA_sample", - dataset_uri=os.path.join(integration_tests_dir, "datasets", "triviaQA_sample.jsonl"), + dataset_name="triviaQA_sample_small", + dataset_uri=os.path.join(integration_tests_dir, "datasets", "triviaQA_sample_small.jsonl"), dataset_mime_type=MIME_TYPE_JSONLINES, model_input_location="question", target_output_location="answer", @@ -58,18 +58,17 @@ def test_evaluate(self, integration_tests_dir): dataset_config=dataset_config, prompt_template=js_model_runner_prompt_template, save=True, - num_records=20, )[0] for eval_score in eval_output.dataset_scores: if eval_score.name == F1_SCORE: # pragma: no branch - assert eval_score.value == approx(0.366667, abs=ABS_TOL) + assert eval_score.value == approx(0.25, abs=ABS_TOL) elif eval_score.name == EXACT_MATCH_SCORE: - assert eval_score.value == approx(0.050000, abs=ABS_TOL) + assert eval_score.value == approx(0.0, abs=ABS_TOL) elif eval_score.name == QUASI_EXACT_MATCH_SCORE: - assert eval_score.value == approx(0.300000, abs=ABS_TOL) + assert eval_score.value == approx(0.25, abs=ABS_TOL) elif eval_score.name == PRECISION_OVER_WORDS: - assert eval_score.value == approx(0.375000, abs=ABS_TOL) + assert eval_score.value == approx(0.25, abs=ABS_TOL) elif eval_score.name == RECALL_OVER_WORDS: - assert eval_score.value == approx(0.375000, abs=ABS_TOL) + assert eval_score.value == approx(0.25, abs=ABS_TOL) elif eval_score.name == BERT_SCORE: - assert eval_score.value == approx(0.721095, abs=ABS_TOL) + assert eval_score.value == approx(0.7721082419157028, abs=ABS_TOL)