Skip to content

Commit

Permalink
[unit test] Adding unit test for metrics.get_accuracy (openai#224)
Browse files Browse the repository at this point in the history
Adding a unit test to get the ball rolling, starting with metrics since
they are fundamental to evaluating performance. :) It would be great to
add some more tests when building out more, and also enable CI (e.g.,
via GitHub actions).

This also fixes an unused param to `get_bootstrap_accuracy_std`.
  • Loading branch information
kjbilton authored Jun 2, 2023
1 parent cde88c0 commit 36c2c74
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 1 deletion.
2 changes: 1 addition & 1 deletion evals/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def get_accuracy(events: Sequence[Event]) -> float:

def get_bootstrap_accuracy_std(events: Sequence[Event], num_samples: int = 1000):
vals = [m.data["correct"] for m in events]
return np.std([np.mean(random.sample(vals, len(vals) // 2)) for _ in range(1000)])
return np.std([np.mean(random.sample(vals, len(vals) // 2)) for _ in range(num_samples)])


def get_confusion_matrix(
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ dependencies = [
"pyyaml",
"sacrebleu",
"matplotlib",
"pytest",
"setuptools_scm",
"langchain"
]
Expand Down
24 changes: 24 additions & 0 deletions tests/unit/evals/test_metrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import List
from unittest.mock import MagicMock

import numpy as np
import pytest

from evals import metrics


@pytest.mark.parametrize(
"event_labels, expected",
[
([True, True], 1.0),
([True, False, False], 0.333),
([False, False], 0.0),
([], np.nan),
],
)
def test_get_accuracy(
event_labels: List[bool],
expected: float,
) -> None:
events = [MagicMock(data={"correct": value}) for value in event_labels]
np.testing.assert_allclose(expected, metrics.get_accuracy(events), rtol=1e-3)

0 comments on commit 36c2c74

Please sign in to comment.