Skip to content

Commit

Permalink
Merge pull request #32 from zbw/31-warnings-being-raised-in-the-unit-…
Browse files Browse the repository at this point in the history
…tests

Remove warnings from unit tests
  • Loading branch information
gmmajal authored Dec 4, 2024
2 parents da88627 + 7c1226b commit 363ab47
Show file tree
Hide file tree
Showing 14 changed files with 118 additions and 457 deletions.
4 changes: 2 additions & 2 deletions CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ authors:
affiliation: "ZBW - Leibniz Information Centre for Economics"
title: "qualle (a framework to predict the quality of a multi-label classification result)"
abstract: "This framework allows to train a model which can be used to predict the quality of the result of applying a multi-label classification (MLC) method on a document. In this implementation, only the recall is predicted for a document, but in principle any document-level quality estimation (such as the prediction of precision) can be implemented analogously."
version: 0.3.1
version: 0.3.2
license: Apache-2.0
date-released: 2024-10-10
date-released: 2024-12-04
repository-code: "https://github.com/zbw/qualle"
contact:
- name: "Automatization of subject indexing using methods from artificial intelligence (AutoSE)"
Expand Down
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,21 @@ By default, a container built from this image launches a REST interface listenin

You need to pass the model file (see below the section REST interface) per bind mount or volume to the docker container.
Beyond that, you need to specify the location of the model file with an
environment variable named `MODEL_FILE`:
environment variable named `MDL_FILE`:

``docker run --rm -it --env MODEL_FILE=/model -v /path/to/model:/model -p 8000:8000 ghcr.io/zbw/qualle``
``docker run --rm -it --env MDL_FILE=/model -v /path/to/model:/model -p 8000:8000 ghcr.io/zbw/qualle``

[Gunicorn](https://gunicorn.org/) is used as HTTP Server. You can use the environment variable ``GUNICORN_CMD_ARGS`` to customize
Gunicorn settings, such as the number of worker processes to use:

``docker run --rm -it --env MODEL_FILE=/model --env GUNICORN_CMD_ARGS="--workers 4" -v /path/to/model:/model -p 8000:8000 ghcr.io/zbw/qualle``
``docker run --rm -it --env MDL_FILE=/model --env GUNICORN_CMD_ARGS="--workers 4" -v /path/to/model:/model -p 8000:8000 ghcr.io/zbw/qualle``

You can also use the Docker image to train or evaluate by using the Qualle command line tool:

```shell
docker run --rm -it -v \
/path/to/train_data_file:/train_data_file -v /path/to/model_dir:/model_dir ghcr.io/zbw/qualle \
qualle train /train_data_file /model_dir/model
/path/to/train_data_file:/train_data_file -v /path/to/model_dir:/mdl_dir ghcr.io/zbw/qualle \
qualle train /train_data_file /mdl_dir/model
```

The Qualle command line tool is not available for the release 0.1.0 and 0.1.1. For these releases,
Expand Down
445 changes: 55 additions & 390 deletions poetry.lock

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "qualle"
version = "0.3.1-dev"
version = "0.3.2-dev"
description = "A framework to predict the quality of a multi-label classification result"
authors = ["AutoSE <[email protected]>"]
license = "Apache-2.0"
Expand All @@ -19,14 +19,14 @@ scikit-learn = "~1.5"
rdflib = "7.0.*"
pydantic = "~2.8"
pydantic-settings = "~2.4"
fastapi = {version = "~0.115", extras = ["standard"]}
fastapi = "~0.115"
uvicorn = "~0.22"


[tool.poetry.group.dev.dependencies]
pytest = "~7.2"
pytest-mock = "~3.10"
pytest-cov = "~4.0"
pytest = "~7.4"
pytest-mock = "~3.14"
pytest-cov = "~6.0"
httpx = "*"
black = "^24.3"

Expand Down
6 changes: 3 additions & 3 deletions qualle/interface/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,12 +113,12 @@ def handle_train(args: argparse.Namespace):


def handle_eval(args: argparse.Namespace):
settings = EvalSettings(test_data_path=args.test_data_path, model_file=args.model)
settings = EvalSettings(test_data_path=args.test_data_path, mdl_file=args.model)
evaluate(settings)


def handle_rest(args: argparse.Namespace):
settings = RESTSettings(model_file=args.model, port=args.port[0], host=args.host[0])
settings = RESTSettings(mdl_file=args.model, port=args.port[0], host=args.host[0])
run(settings)


Expand All @@ -132,7 +132,7 @@ def handle_predict(args: argparse.Namespace):
)
settings = PredictSettings(
predict_data_path=predict_data_path,
model_file=args.model,
mdl_file=args.model,
output_path=output_path,
)
predict(settings)
Expand Down
6 changes: 3 additions & 3 deletions qualle/interface/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,12 @@ class TrainSettings(BaseSettings):

class EvalSettings(BaseSettings):
test_data_path: FileOrDirPath
model_file: FilePath
mdl_file: FilePath


class PredictSettings(BaseSettings):
predict_data_path: FileOrDirPath
model_file: FilePath
mdl_file: FilePath
output_path: Optional[Path] = None

@model_validator(mode="after")
Expand All @@ -102,6 +102,6 @@ def check_output_path_specified_for_input_file(self):


class RESTSettings(BaseSettings):
model_file: FilePath
mdl_file: FilePath
port: int = 8000
host: str = "127.0.0.1"
4 changes: 2 additions & 2 deletions qualle/interface/internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def _get_class_from_str(fully_qualified_path: str) -> Type:
def evaluate(settings: EvalSettings):
logger = get_logger()
path_to_test_data = settings.test_data_path
path_to_model_file = settings.model_file
path_to_model_file = settings.mdl_file
model = load_model(str(path_to_model_file))
logger.info("Run evaluation with model:\n%s", model)
test_input = _load_train_input(path_to_test_data)
Expand All @@ -163,7 +163,7 @@ def evaluate(settings: EvalSettings):
def predict(settings: PredictSettings):
logger = get_logger()
path_to_predict_data = settings.predict_data_path
path_to_model_file = settings.model_file
path_to_model_file = settings.mdl_file
output_path = settings.output_path
model = load_model(str(path_to_model_file))
io_handler = _get_predict_io_handler(
Expand Down
4 changes: 2 additions & 2 deletions qualle/interface/rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ class QualityEstimation(BaseModel):
@lru_cache
def load_model() -> QualityEstimationPipeline:
settings = RESTSettings()
return internal_load_model(str(settings.model_file))
return internal_load_model(str(settings.mdl_file))


@router.post(
Expand Down Expand Up @@ -95,7 +95,7 @@ def create_app(settings: Optional[RESTSettings] = None):
settings = settings or RESTSettings()
app = FastAPI()
app.include_router(router)
m = internal_load_model(str(settings.model_file))
m = internal_load_model(str(settings.mdl_file))
app.dependency_overrides[load_model] = lambda: m

return app
Expand Down
2 changes: 1 addition & 1 deletion tests/interface/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def thsys_file_path(tmp_path):


@pytest.fixture
def model_path(tmp_path):
def mdl_path(tmp_path):
fp = tmp_path / "model"
fp.write_text("")
return fp
32 changes: 14 additions & 18 deletions tests/interface/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,65 +216,61 @@ def test_handle_train_creates_regressors(train_args_dict):
)


def test_handle_eval(tmp_path, model_path):
def test_handle_eval(tmp_path, mdl_path):
test_data_path = tmp_path / "testdata"
test_data_path.mkdir()
handle_eval(Namespace(**dict(test_data_path=test_data_path, model=model_path)))
handle_eval(Namespace(**dict(test_data_path=test_data_path, model=mdl_path)))
cli.evaluate.assert_called_once()
actual_settings = cli.evaluate.call_args[0][0]
assert actual_settings == EvalSettings(
test_data_path=test_data_path, model_file=model_path
test_data_path=test_data_path, mdl_file=mdl_path
)


def test_handle_rest(mocker, model_path):
def test_handle_rest(mocker, mdl_path):
m_run = mocker.Mock()
mocker.patch("qualle.interface.cli.run", m_run)

cli.handle_rest(Namespace(**dict(model=model_path, port=[9000], host=["x"])))
cli.handle_rest(Namespace(**dict(model=mdl_path, port=[9000], host=["x"])))

m_run.assert_called_once_with(
RESTSettings(model_file=model_path, host="x", port=9000)
)
m_run.assert_called_once_with(RESTSettings(mdl_file=mdl_path, host="x", port=9000))


def test_handle_predict_with_dir(tmp_path, model_path):
def test_handle_predict_with_dir(tmp_path, mdl_path):
predict_data_path = tmp_path / "predict"
predict_data_path.mkdir()
cli.handle_predict(
Namespace(
**dict(predict_data_path=predict_data_path, model=model_path, output=None)
**dict(predict_data_path=predict_data_path, model=mdl_path, output=None)
)
)
cli.predict.assert_called_once()
actual_settings = cli.predict.call_args[0][0]
assert actual_settings == PredictSettings(
predict_data_path=predict_data_path, model_file=model_path
predict_data_path=predict_data_path, mdl_file=mdl_path
)


def test_handle_predict_with_file(tsv_file_path, tmp_path, model_path):
def test_handle_predict_with_file(tsv_file_path, tmp_path, mdl_path):
output_path = tmp_path / "output.txt"
cli.handle_predict(
Namespace(
**dict(
predict_data_path=tsv_file_path, model=model_path, output=[output_path]
predict_data_path=tsv_file_path, model=mdl_path, output=[output_path]
)
)
)
cli.predict.assert_called_once()
actual_settings = cli.predict.call_args[0][0]
assert actual_settings == PredictSettings(
predict_data_path=tsv_file_path, model_file=model_path, output_path=output_path
predict_data_path=tsv_file_path, mdl_file=mdl_path, output_path=output_path
)


def test_handle_predict_with_file_raises_exc_if_no_output_file(
tsv_file_path, model_path
):
def test_handle_predict_with_file_raises_exc_if_no_output_file(tsv_file_path, mdl_path):
with pytest.raises(CliValidationError):
cli.handle_predict(
Namespace(
**dict(predict_data_path=tsv_file_path, model=model_path, output=None)
**dict(predict_data_path=tsv_file_path, model=mdl_path, output=None)
)
)
6 changes: 3 additions & 3 deletions tests/interface/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,20 @@ def test_predict_settings_input_file_but_no_output_raises_exc(tmp_path):
mp = tmp_path / "model"
mp.write_text("modelInfo")
with pytest.raises(ValidationError):
PredictSettings(predict_data_path=fp, model_file=mp)
PredictSettings(predict_data_path=fp, mdl_file=mp)


def test_predict_settings_input_path_no_exc_1(tmp_path):
fp = tmp_path / "data"
fp.mkdir()
mp = tmp_path / "model"
mp.write_text("modelInfo")
PredictSettings(predict_data_path=fp, model_file=mp)
PredictSettings(predict_data_path=fp, mdl_file=mp)


def test_predict_settings_input_path_no_exc_2(tmp_path):
fp = tmp_path / "fp.tsv"
fp.write_text("t\tc:0\tc")
mp = tmp_path / "model"
mp.write_text("modelInfo")
PredictSettings(predict_data_path=fp, model_file=mp, output_path=tmp_path)
PredictSettings(predict_data_path=fp, mdl_file=mp, output_path=tmp_path)
12 changes: 6 additions & 6 deletions tests/interface/test_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,14 @@ def test_train_with_slc_uses_all_subthesauri_if_no_subthesauri_passed(
)


def test_evaluate(mocker, tsv_data_path, train_data, model_path):
def test_evaluate(mocker, tsv_data_path, train_data, mdl_path):
m_eval = mocker.Mock()
m_eval.evaluate.return_value = {}
m_eval_cls = mocker.Mock(return_value=m_eval)
mocker.patch("qualle.interface.internal.Evaluator", m_eval_cls)
internal.load.return_value = "testmodel"

settings = EvalSettings(test_data_path=tsv_data_path, model_file=model_path)
settings = EvalSettings(test_data_path=tsv_data_path, mdl_file=mdl_path)
internal.evaluate(settings)

m_eval_cls.assert_called_once_with(train_data, "testmodel")
Expand Down Expand Up @@ -276,10 +276,10 @@ def test_load_train_input_from_tsv(tsv_data_path, train_data):
assert internal._load_train_input(tsv_data_path) == train_data


def test_predict_stores_scores_from_model(tsv_data_path, tmp_path, model_path):
def test_predict_stores_scores_from_model(tsv_data_path, tmp_path, mdl_path):
output_path = tmp_path / "qualle.txt"
settings = PredictSettings(
predict_data_path=tsv_data_path, model_file=model_path, output_path=output_path
predict_data_path=tsv_data_path, mdl_file=mdl_path, output_path=output_path
)
mock_model = internal.load.return_value
mock_model.predict.side_effect = lambda p_data: map(lambda s: s[0], p_data.scores)
Expand All @@ -292,11 +292,11 @@ def test_predict_stores_scores_from_model(tsv_data_path, tmp_path, model_path):


def test_predict_with_annif_data_stores_scores_from_model(
annif_data_dir, tmp_path, model_path
annif_data_dir, tmp_path, mdl_path
):
settings = PredictSettings(
predict_data_path=annif_data_dir,
model_file=model_path,
mdl_file=mdl_path,
)
mock_model = internal.load.return_value
mock_model.predict.side_effect = lambda p_data: map(lambda s: s[0], p_data.scores)
Expand Down
8 changes: 4 additions & 4 deletions tests/interface/test_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def mocked_pipeline(mocker):


@pytest.fixture
def client(mocked_pipeline, model_path):
app = create_app(RESTSettings(model_file=model_path))
def client(mocked_pipeline, mdl_path):
app = create_app(RESTSettings(mdl_file=mdl_path))
client = TestClient(app)
return client

Expand Down Expand Up @@ -95,14 +95,14 @@ def test_return_http_200_for_up(client):
assert resp.status_code == status.HTTP_200_OK


def test_run(mocker, model_path):
def test_run(mocker, mdl_path):
m_app = mocker.Mock()
m_create_app = mocker.Mock(return_value=m_app)
mocker.patch("qualle.interface.rest.create_app", m_create_app)
m_uvicorn_run = mocker.Mock()
mocker.patch("qualle.interface.rest.uvicorn.run", m_uvicorn_run)

settings = RESTSettings(model_file=model_path)
settings = RESTSettings(mdl_file=mdl_path)

run(settings)

Expand Down
26 changes: 13 additions & 13 deletions tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def train_data_file(tmp_path):


@pytest.fixture
def model_path(tmp_path):
def mdl_path(tmp_path):
return tmp_path / "output.model"


Expand All @@ -55,17 +55,17 @@ def predict_output_path(tmp_path):
return tmp_path / "output.txt"


def test_train_stores_model(train_data_file, model_path):
train(train_data_file, model_path)
assert model_path.is_file()
def test_train_stores_model(train_data_file, mdl_path):
train(train_data_file, mdl_path)
assert mdl_path.is_file()


def test_eval_prints_scores(train_data_file, model_path, caplog):
def test_eval_prints_scores(train_data_file, mdl_path, caplog):
caplog.set_level(logging.INFO)

train(train_data_file, model_path)
train(train_data_file, mdl_path)

settings = EvalSettings(test_data_path=train_data_file, model_file=model_path)
settings = EvalSettings(test_data_path=train_data_file, mdl_file=mdl_path)
internal.evaluate(settings)

assert "Scores:" in caplog.text
Expand All @@ -77,9 +77,9 @@ def test_eval_prints_scores(train_data_file, model_path, caplog):
assert "correlation_coefficient: nan" in caplog.text


def test_rest(train_data_file, model_path):
train(train_data_file, model_path)
settings = RESTSettings(model_file=model_path)
def test_rest(train_data_file, mdl_path):
train(train_data_file, mdl_path)
settings = RESTSettings(mdl_file=mdl_path)
app = create_app(settings)
client = TestClient(app)
res = client.post(
Expand All @@ -105,13 +105,13 @@ def test_rest(train_data_file, model_path):


def test_predict_stores_quality_estimation(
train_data_file, model_path, predict_output_path
train_data_file, mdl_path, predict_output_path
):
train(train_data_file, model_path)
train(train_data_file, mdl_path)

settings = PredictSettings(
predict_data_path=train_data_file,
model_file=model_path,
mdl_file=mdl_path,
output_path=predict_output_path,
)
internal.predict(settings)
Expand Down

0 comments on commit 363ab47

Please sign in to comment.