Merge pull request #32 from zbw/31-warnings-being-raised-in-the-unit-…

…tests Remove warnings from unit tests
zbw · Dec 4, 2024 · 363ab47 · 363ab47
2 parents da88627 + 7c1226b
commit 363ab47
Show file tree

Hide file tree

Showing 14 changed files with 118 additions and 457 deletions.
diff --git a/CITATION.cff b/CITATION.cff
@@ -15,9 +15,9 @@ authors:
     affiliation: "ZBW - Leibniz Information Centre for Economics"
 title: "qualle (a framework to predict the quality of a multi-label classification result)"
 abstract: "This framework allows to train a model which can be used to predict the quality of the result of applying a multi-label classification (MLC) method on a document. In this implementation, only the recall is predicted for a document, but in principle any document-level quality estimation (such as the prediction of precision) can be implemented analogously."
-version: 0.3.1
+version: 0.3.2
 license: Apache-2.0
-date-released: 2024-10-10
+date-released: 2024-12-04
 repository-code: "https://github.com/zbw/qualle"
 contact:
   - name: "Automatization of subject indexing using methods from artificial intelligence (AutoSE)"

diff --git a/README.md b/README.md
@@ -60,21 +60,21 @@ By default, a container built from this image launches a REST interface listenin
 
 You need to pass the model file (see below the section REST interface) per bind mount or volume to the docker container.
 Beyond that, you need to specify the location of the model file with an
-environment variable named `MODEL_FILE`:
+environment variable named `MDL_FILE`:
 
-``docker run --rm -it --env MODEL_FILE=/model -v /path/to/model:/model -p 8000:8000 ghcr.io/zbw/qualle``
+``docker run --rm -it --env MDL_FILE=/model -v /path/to/model:/model -p 8000:8000 ghcr.io/zbw/qualle``
 
 [Gunicorn](https://gunicorn.org/) is used as HTTP Server. You can use the environment variable ``GUNICORN_CMD_ARGS`` to customize
 Gunicorn settings, such as the number of worker processes to use:
 
-``docker run --rm -it --env MODEL_FILE=/model --env GUNICORN_CMD_ARGS="--workers 4" -v /path/to/model:/model -p 8000:8000 ghcr.io/zbw/qualle``
+``docker run --rm -it --env MDL_FILE=/model --env GUNICORN_CMD_ARGS="--workers 4" -v /path/to/model:/model -p 8000:8000 ghcr.io/zbw/qualle``
 
 You can also use the Docker image to train or evaluate by using the Qualle command line tool:
 
 ```shell
 docker run --rm -it -v \
- /path/to/train_data_file:/train_data_file -v /path/to/model_dir:/model_dir ghcr.io/zbw/qualle \
- qualle train /train_data_file /model_dir/model
+ /path/to/train_data_file:/train_data_file -v /path/to/model_dir:/mdl_dir ghcr.io/zbw/qualle \
+ qualle train /train_data_file /mdl_dir/model
  ```
 
 The Qualle command line tool is not available for the release 0.1.0 and 0.1.1. For these releases,

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "qualle"
-version = "0.3.1-dev"
+version = "0.3.2-dev"
 description = "A framework to predict the quality of a multi-label classification result"
 authors = ["AutoSE <[email protected]>"]
 license = "Apache-2.0"
@@ -19,14 +19,14 @@ scikit-learn = "~1.5"
 rdflib = "7.0.*"
 pydantic = "~2.8"
 pydantic-settings = "~2.4"
-fastapi = {version = "~0.115", extras = ["standard"]}
+fastapi = "~0.115"
 uvicorn = "~0.22"
 
 
 [tool.poetry.group.dev.dependencies]
-pytest = "~7.2"
-pytest-mock = "~3.10"
-pytest-cov = "~4.0"
+pytest = "~7.4"
+pytest-mock = "~3.14"
+pytest-cov = "~6.0"
 httpx = "*"
 black = "^24.3"
 

diff --git a/qualle/interface/cli.py b/qualle/interface/cli.py
@@ -113,12 +113,12 @@ def handle_train(args: argparse.Namespace):
 
 
 def handle_eval(args: argparse.Namespace):
-    settings = EvalSettings(test_data_path=args.test_data_path, model_file=args.model)
+    settings = EvalSettings(test_data_path=args.test_data_path, mdl_file=args.model)
     evaluate(settings)
 
 
 def handle_rest(args: argparse.Namespace):
-    settings = RESTSettings(model_file=args.model, port=args.port[0], host=args.host[0])
+    settings = RESTSettings(mdl_file=args.model, port=args.port[0], host=args.host[0])
     run(settings)
 
 
@@ -132,7 +132,7 @@ def handle_predict(args: argparse.Namespace):
         )
     settings = PredictSettings(
         predict_data_path=predict_data_path,
-        model_file=args.model,
+        mdl_file=args.model,
         output_path=output_path,
     )
     predict(settings)

diff --git a/qualle/interface/config.py b/qualle/interface/config.py
@@ -81,12 +81,12 @@ class TrainSettings(BaseSettings):
 
 class EvalSettings(BaseSettings):
     test_data_path: FileOrDirPath
-    model_file: FilePath
+    mdl_file: FilePath
 
 
 class PredictSettings(BaseSettings):
     predict_data_path: FileOrDirPath
-    model_file: FilePath
+    mdl_file: FilePath
     output_path: Optional[Path] = None
 
     @model_validator(mode="after")
@@ -102,6 +102,6 @@ def check_output_path_specified_for_input_file(self):
 
 
 class RESTSettings(BaseSettings):
-    model_file: FilePath
+    mdl_file: FilePath
     port: int = 8000
     host: str = "127.0.0.1"
diff --git a/qualle/interface/internal.py b/qualle/interface/internal.py
@@ -149,7 +149,7 @@ def _get_class_from_str(fully_qualified_path: str) -> Type:
 def evaluate(settings: EvalSettings):
     logger = get_logger()
     path_to_test_data = settings.test_data_path
-    path_to_model_file = settings.model_file
+    path_to_model_file = settings.mdl_file
     model = load_model(str(path_to_model_file))
     logger.info("Run evaluation with model:\n%s", model)
     test_input = _load_train_input(path_to_test_data)
@@ -163,7 +163,7 @@ def evaluate(settings: EvalSettings):
 def predict(settings: PredictSettings):
     logger = get_logger()
     path_to_predict_data = settings.predict_data_path
-    path_to_model_file = settings.model_file
+    path_to_model_file = settings.mdl_file
     output_path = settings.output_path
     model = load_model(str(path_to_model_file))
     io_handler = _get_predict_io_handler(

diff --git a/qualle/interface/rest.py b/qualle/interface/rest.py
@@ -58,7 +58,7 @@ class QualityEstimation(BaseModel):
 @lru_cache
 def load_model() -> QualityEstimationPipeline:
     settings = RESTSettings()
-    return internal_load_model(str(settings.model_file))
+    return internal_load_model(str(settings.mdl_file))
 
 
 @router.post(
@@ -95,7 +95,7 @@ def create_app(settings: Optional[RESTSettings] = None):
     settings = settings or RESTSettings()
     app = FastAPI()
     app.include_router(router)
-    m = internal_load_model(str(settings.model_file))
+    m = internal_load_model(str(settings.mdl_file))
     app.dependency_overrides[load_model] = lambda: m
 
     return app

diff --git a/tests/interface/conftest.py b/tests/interface/conftest.py
@@ -22,7 +22,7 @@ def thsys_file_path(tmp_path):
 
 
 @pytest.fixture
-def model_path(tmp_path):
+def mdl_path(tmp_path):
     fp = tmp_path / "model"
     fp.write_text("")
     return fp
diff --git a/tests/interface/test_cli.py b/tests/interface/test_cli.py
@@ -216,65 +216,61 @@ def test_handle_train_creates_regressors(train_args_dict):
     )
 
 
-def test_handle_eval(tmp_path, model_path):
+def test_handle_eval(tmp_path, mdl_path):
     test_data_path = tmp_path / "testdata"
     test_data_path.mkdir()
-    handle_eval(Namespace(**dict(test_data_path=test_data_path, model=model_path)))
+    handle_eval(Namespace(**dict(test_data_path=test_data_path, model=mdl_path)))
     cli.evaluate.assert_called_once()
     actual_settings = cli.evaluate.call_args[0][0]
     assert actual_settings == EvalSettings(
-        test_data_path=test_data_path, model_file=model_path
+        test_data_path=test_data_path, mdl_file=mdl_path
     )
 
 
-def test_handle_rest(mocker, model_path):
+def test_handle_rest(mocker, mdl_path):
     m_run = mocker.Mock()
     mocker.patch("qualle.interface.cli.run", m_run)
 
-    cli.handle_rest(Namespace(**dict(model=model_path, port=[9000], host=["x"])))
+    cli.handle_rest(Namespace(**dict(model=mdl_path, port=[9000], host=["x"])))
 
-    m_run.assert_called_once_with(
-        RESTSettings(model_file=model_path, host="x", port=9000)
-    )
+    m_run.assert_called_once_with(RESTSettings(mdl_file=mdl_path, host="x", port=9000))
 
 
-def test_handle_predict_with_dir(tmp_path, model_path):
+def test_handle_predict_with_dir(tmp_path, mdl_path):
     predict_data_path = tmp_path / "predict"
     predict_data_path.mkdir()
     cli.handle_predict(
         Namespace(
-            **dict(predict_data_path=predict_data_path, model=model_path, output=None)
+            **dict(predict_data_path=predict_data_path, model=mdl_path, output=None)
         )
     )
     cli.predict.assert_called_once()
     actual_settings = cli.predict.call_args[0][0]
     assert actual_settings == PredictSettings(
-        predict_data_path=predict_data_path, model_file=model_path
+        predict_data_path=predict_data_path, mdl_file=mdl_path
     )
 
 
-def test_handle_predict_with_file(tsv_file_path, tmp_path, model_path):
+def test_handle_predict_with_file(tsv_file_path, tmp_path, mdl_path):
     output_path = tmp_path / "output.txt"
     cli.handle_predict(
         Namespace(
             **dict(
-                predict_data_path=tsv_file_path, model=model_path, output=[output_path]
+                predict_data_path=tsv_file_path, model=mdl_path, output=[output_path]
             )
         )
     )
     cli.predict.assert_called_once()
     actual_settings = cli.predict.call_args[0][0]
     assert actual_settings == PredictSettings(
-        predict_data_path=tsv_file_path, model_file=model_path, output_path=output_path
+        predict_data_path=tsv_file_path, mdl_file=mdl_path, output_path=output_path
     )
 
 
-def test_handle_predict_with_file_raises_exc_if_no_output_file(
-    tsv_file_path, model_path
-):
+def test_handle_predict_with_file_raises_exc_if_no_output_file(tsv_file_path, mdl_path):
     with pytest.raises(CliValidationError):
         cli.handle_predict(
             Namespace(
-                **dict(predict_data_path=tsv_file_path, model=model_path, output=None)
+                **dict(predict_data_path=tsv_file_path, model=mdl_path, output=None)
             )
         )
diff --git a/tests/interface/test_config.py b/tests/interface/test_config.py
@@ -24,20 +24,20 @@ def test_predict_settings_input_file_but_no_output_raises_exc(tmp_path):
     mp = tmp_path / "model"
     mp.write_text("modelInfo")
     with pytest.raises(ValidationError):
-        PredictSettings(predict_data_path=fp, model_file=mp)
+        PredictSettings(predict_data_path=fp, mdl_file=mp)
 
 
 def test_predict_settings_input_path_no_exc_1(tmp_path):
     fp = tmp_path / "data"
     fp.mkdir()
     mp = tmp_path / "model"
     mp.write_text("modelInfo")
-    PredictSettings(predict_data_path=fp, model_file=mp)
+    PredictSettings(predict_data_path=fp, mdl_file=mp)
 
 
 def test_predict_settings_input_path_no_exc_2(tmp_path):
     fp = tmp_path / "fp.tsv"
     fp.write_text("t\tc:0\tc")
     mp = tmp_path / "model"
     mp.write_text("modelInfo")
-    PredictSettings(predict_data_path=fp, model_file=mp, output_path=tmp_path)
+    PredictSettings(predict_data_path=fp, mdl_file=mp, output_path=tmp_path)
diff --git a/tests/interface/test_internal.py b/tests/interface/test_internal.py
@@ -239,14 +239,14 @@ def test_train_with_slc_uses_all_subthesauri_if_no_subthesauri_passed(
     )
 
 
-def test_evaluate(mocker, tsv_data_path, train_data, model_path):
+def test_evaluate(mocker, tsv_data_path, train_data, mdl_path):
     m_eval = mocker.Mock()
     m_eval.evaluate.return_value = {}
     m_eval_cls = mocker.Mock(return_value=m_eval)
     mocker.patch("qualle.interface.internal.Evaluator", m_eval_cls)
     internal.load.return_value = "testmodel"
 
-    settings = EvalSettings(test_data_path=tsv_data_path, model_file=model_path)
+    settings = EvalSettings(test_data_path=tsv_data_path, mdl_file=mdl_path)
     internal.evaluate(settings)
 
     m_eval_cls.assert_called_once_with(train_data, "testmodel")
@@ -276,10 +276,10 @@ def test_load_train_input_from_tsv(tsv_data_path, train_data):
     assert internal._load_train_input(tsv_data_path) == train_data
 
 
-def test_predict_stores_scores_from_model(tsv_data_path, tmp_path, model_path):
+def test_predict_stores_scores_from_model(tsv_data_path, tmp_path, mdl_path):
     output_path = tmp_path / "qualle.txt"
     settings = PredictSettings(
-        predict_data_path=tsv_data_path, model_file=model_path, output_path=output_path
+        predict_data_path=tsv_data_path, mdl_file=mdl_path, output_path=output_path
     )
     mock_model = internal.load.return_value
     mock_model.predict.side_effect = lambda p_data: map(lambda s: s[0], p_data.scores)
@@ -292,11 +292,11 @@ def test_predict_stores_scores_from_model(tsv_data_path, tmp_path, model_path):
 
 
 def test_predict_with_annif_data_stores_scores_from_model(
-    annif_data_dir, tmp_path, model_path
+    annif_data_dir, tmp_path, mdl_path
 ):
     settings = PredictSettings(
         predict_data_path=annif_data_dir,
-        model_file=model_path,
+        mdl_file=mdl_path,
     )
     mock_model = internal.load.return_value
     mock_model.predict.side_effect = lambda p_data: map(lambda s: s[0], p_data.scores)

diff --git a/tests/interface/test_rest.py b/tests/interface/test_rest.py
@@ -43,8 +43,8 @@ def mocked_pipeline(mocker):
 
 
 @pytest.fixture
-def client(mocked_pipeline, model_path):
-    app = create_app(RESTSettings(model_file=model_path))
+def client(mocked_pipeline, mdl_path):
+    app = create_app(RESTSettings(mdl_file=mdl_path))
     client = TestClient(app)
     return client
 
@@ -95,14 +95,14 @@ def test_return_http_200_for_up(client):
     assert resp.status_code == status.HTTP_200_OK
 
 
-def test_run(mocker, model_path):
+def test_run(mocker, mdl_path):
     m_app = mocker.Mock()
     m_create_app = mocker.Mock(return_value=m_app)
     mocker.patch("qualle.interface.rest.create_app", m_create_app)
     m_uvicorn_run = mocker.Mock()
     mocker.patch("qualle.interface.rest.uvicorn.run", m_uvicorn_run)
 
-    settings = RESTSettings(model_file=model_path)
+    settings = RESTSettings(mdl_file=mdl_path)
 
     run(settings)
 

diff --git a/tests/test_integration.py b/tests/test_integration.py
@@ -46,7 +46,7 @@ def train_data_file(tmp_path):
 
 
 @pytest.fixture
-def model_path(tmp_path):
+def mdl_path(tmp_path):
     return tmp_path / "output.model"
 
 
@@ -55,17 +55,17 @@ def predict_output_path(tmp_path):
     return tmp_path / "output.txt"
 
 
-def test_train_stores_model(train_data_file, model_path):
-    train(train_data_file, model_path)
-    assert model_path.is_file()
+def test_train_stores_model(train_data_file, mdl_path):
+    train(train_data_file, mdl_path)
+    assert mdl_path.is_file()
 
 
-def test_eval_prints_scores(train_data_file, model_path, caplog):
+def test_eval_prints_scores(train_data_file, mdl_path, caplog):
     caplog.set_level(logging.INFO)
 
-    train(train_data_file, model_path)
+    train(train_data_file, mdl_path)
 
-    settings = EvalSettings(test_data_path=train_data_file, model_file=model_path)
+    settings = EvalSettings(test_data_path=train_data_file, mdl_file=mdl_path)
     internal.evaluate(settings)
 
     assert "Scores:" in caplog.text
@@ -77,9 +77,9 @@ def test_eval_prints_scores(train_data_file, model_path, caplog):
     assert "correlation_coefficient: nan" in caplog.text
 
 
-def test_rest(train_data_file, model_path):
-    train(train_data_file, model_path)
-    settings = RESTSettings(model_file=model_path)
+def test_rest(train_data_file, mdl_path):
+    train(train_data_file, mdl_path)
+    settings = RESTSettings(mdl_file=mdl_path)
     app = create_app(settings)
     client = TestClient(app)
     res = client.post(
@@ -105,13 +105,13 @@ def test_rest(train_data_file, model_path):
 
 
 def test_predict_stores_quality_estimation(
-    train_data_file, model_path, predict_output_path
+    train_data_file, mdl_path, predict_output_path
 ):
-    train(train_data_file, model_path)
+    train(train_data_file, mdl_path)
 
     settings = PredictSettings(
         predict_data_path=train_data_file,
-        model_file=model_path,
+        mdl_file=mdl_path,
         output_path=predict_output_path,
     )
     internal.predict(settings)