Skip to content

Commit

Permalink
Merge branch 'main' into feature/add-error-collector-to-iso3-codes-va…
Browse files Browse the repository at this point in the history
…lidator
  • Loading branch information
dc-almeida committed Sep 17, 2024
2 parents 5f99c87 + 367e10c commit 1e9faca
Show file tree
Hide file tree
Showing 10 changed files with 101 additions and 45 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
shell: bash
strategy:
matrix:
os: ["macos", "ubuntu"]
os: ["macos", "ubuntu", "windows"]
# keep consistent with py-version badge in README.md and docs/index.rst
python-version: ["3.10", "3.11", "3.12"]
fail-fast: false
Expand Down Expand Up @@ -44,10 +44,10 @@ jobs:
# load cached venv if cache exists
#----------------------------------------------
- name: Load cached venv
id: cached-poetry-dependencies
id: cached-pip-wheels
uses: actions/cache@v4
with:
path: .venv
path: ~/.cache
key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}
#----------------------------------------------
# install dependencies if cache does not exist
Expand Down
7 changes: 0 additions & 7 deletions nomenclature/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,6 @@

import yaml


def log_error(dimension, error_list):
"""Compile an error message and write to log"""
msg = f"The following {dimension}(s) are not defined in the {dimension} codelist:"
logging.error("\n - ".join(map(str, [msg] + error_list)))


from nomenclature.cli import cli # noqa
from nomenclature.codelist import CodeList # noqa
from nomenclature.core import process # noqa
Expand Down
40 changes: 31 additions & 9 deletions nomenclature/codelist.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
from pydantic_core import PydanticCustomError

import nomenclature
from nomenclature import log_error
from nomenclature.code import Code, MetaCode, RegionCode, VariableCode
from nomenclature.config import CodeListConfig, NomenclatureConfig
from nomenclature.error import ErrorCollector, custom_pydantic_errors
from nomenclature.error import ErrorCollector, custom_pydantic_errors, log_error

here = Path(__file__).parent.absolute()

Expand Down Expand Up @@ -98,9 +97,14 @@ def keys(self):
def values(self):
return self.mapping.values()

def validate_data(self, df: IamDataFrame, dimension: str) -> bool:
def validate_data(
self,
df: IamDataFrame,
dimension: str,
project: str | None = None,
) -> bool:
if invalid := self.validate_items(getattr(df, dimension)):
log_error(dimension, invalid)
log_error(dimension, invalid, project)
return False
return True

Expand Down Expand Up @@ -600,7 +604,11 @@ def vars_kwargs(self, variables: List[str]) -> List[VariableCode]:
if self[var].agg_kwargs and not self[var].skip_region_aggregation
]

def validate_units(self, unit_mapping) -> bool:
def validate_units(
self,
unit_mapping,
project: None | str = None,
) -> bool:
if invalid_units := [
(variable, unit, self.mapping[variable].unit)
for variable, unit in unit_mapping.items()
Expand All @@ -613,14 +621,28 @@ def validate_units(self, unit_mapping) -> bool:
for v, u, e in invalid_units
]
msg = "The following variable(s) are reported with the wrong unit:"
logging.error("\n - ".join([msg] + lst))
file_service_address = "https://files.ece.iiasa.ac.at"
logging.error(
"\n - ".join([msg] + lst)
+ (
f"\n\nPlease refer to {file_service_address}/{project}/"
f"{project}-template.xlsx for the list of allowed units."
if project is not None
else ""
)
)
return False
return True

def validate_data(self, df: IamDataFrame, dimension: str) -> bool:
def validate_data(
self,
df: IamDataFrame,
dimension: str,
project: str | None = None,
) -> bool:
# validate variables
all_variables_valid = super().validate_data(df, dimension)
all_units_valid = self.validate_units(df.unit_mapping)
all_variables_valid = super().validate_data(df, dimension, project)
all_units_valid = self.validate_units(df.unit_mapping, project)
return all_variables_valid and all_units_valid

def list_missing_variables(
Expand Down
8 changes: 7 additions & 1 deletion nomenclature/definition.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def __init__(self, path, dimensions=None):
path = Path(path)

self.project_folder = path.parent
self.project = self.project_folder.name.split("-workflow")[0]

if (file := self.project_folder / "nomenclature.yaml").exists():
self.config = NomenclatureConfig.from_file(file=file)
Expand Down Expand Up @@ -98,7 +99,12 @@ def validate(self, df: IamDataFrame, dimensions: list | None = None) -> None:
"""

if any(
getattr(self, dimension).validate_data(df, dimension) is False
getattr(self, dimension).validate_data(
df,
dimension,
self.project,
)
is False
for dimension in (dimensions or self.dimensions)
):
raise ValueError("The validation failed. Please check the log for details.")
Expand Down
21 changes: 21 additions & 0 deletions nomenclature/error.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import textwrap
from collections import namedtuple
from typing import Optional
Expand Down Expand Up @@ -71,3 +72,23 @@ def __repr__(self) -> str:

def __bool__(self) -> bool:
return bool(self.errors)


def log_error(
dimension: str,
error_list,
project: str | None = None,
) -> None:
"""Compile an error message and write to log"""
file_service_address = "https://files.ece.iiasa.ac.at"
msg = f"The following {dimension}(s) are not defined in the {dimension} codelist:"

logging.error(
"\n - ".join(map(str, [msg] + error_list))
+ (
f"\n\nPlease refer to {file_service_address}/{project}/{project}"
f"-template.xlsx for the list of allowed {dimension}s."
if project is not None
else ""
)
)
3 changes: 1 addition & 2 deletions nomenclature/processor/region.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,9 @@
from pydantic.types import DirectoryPath, FilePath
from pydantic_core import PydanticCustomError

from nomenclature import log_error
from nomenclature.codelist import RegionCodeList, VariableCodeList
from nomenclature.definition import DataStructureDefinition
from nomenclature.error import custom_pydantic_errors, ErrorCollector
from nomenclature.error import custom_pydantic_errors, ErrorCollector, log_error
from nomenclature.processor import Processor
from nomenclature.processor.utils import get_relative_path

Expand Down
9 changes: 8 additions & 1 deletion nomenclature/processor/required_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,13 @@ def apply(self, df: IamDataFrame) -> IamDataFrame:
for model, data_list in missing_data.items():
missing_data_log_info += f"Missing for '{model}':\n"
for data in data_list:
missing_data_log_info += f"{data}\n\n"
missing_data_log_info += (
data.to_string(
index=False,
justify="left",
)
+ "\n\n"
)
logger.error(
"Missing required data.\nFile: %s\n\n%s",
get_relative_path(self.file),
Expand Down Expand Up @@ -212,6 +218,7 @@ def check_required_data_per_model(
.to_frame()
.reset_index()
.drop(columns=["model"])
.rename(columns={"year": "year(s)"})
)
return missing_data

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
model: model_a
required_data:
- measurand:
- Primary Energy:
- Primary Energy|Making sure that a really long variable is displayed completely:
unit: [GWh/yr, Mtoe]
year: [2005, 2010, 2015] # 2015 is missing from simple_df for all models
- variable: Final Energy
Expand Down
32 changes: 14 additions & 18 deletions tests/test_required_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,24 +100,20 @@ def test_RequiredData_apply_raises(simple_df, caplog):
required_data_validator.apply(simple_df)

missing_data = [
"""
scenario variable unit year
0 scen_a Primary Energy GWh/yr 2005,2010,2015
1 scen_a Primary Energy Mtoe 2005,2010,2015
2 scen_b Primary Energy GWh/yr 2005,2010,2015
3 scen_b Primary Energy Mtoe 2005,2010,2015""",
"""
scenario variable
0 scen_a Final Energy
1 scen_b Final Energy""",
"""
scenario variable unit
0 scen_a Emissions|CO2 Mt CO2/yr
1 scen_b Emissions|CO2 Mt CO2/yr""",
"""
scenario region variable
0 scen_a World Final Energy
1 scen_b World Final Energy""",
"""scenario variable unit year(s)""",
"""scen_a Primary Energy|Making sure that a really long variable is displayed completely GWh/yr 2005,2010,2015
scen_a Primary Energy|Making sure that a really long variable is displayed completely Mtoe 2005,2010,2015
scen_b Primary Energy|Making sure that a really long variable is displayed completely GWh/yr 2005,2010,2015
scen_b Primary Energy|Making sure that a really long variable is displayed completely Mtoe 2005,2010,2015""",
"""scenario variable""",
"""scen_a Final Energy
scen_b Final Energy""",
"""scenario variable unit""",
"""scen_a Emissions|CO2 Mt CO2/yr
scen_b Emissions|CO2 Mt CO2/yr""",
"""scenario region variable""",
"""scen_a World Final Energy
scen_b World Final Energy""",
]
# check if the log message contains the correct information
assert all(
Expand Down
18 changes: 15 additions & 3 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,40 @@ def test_validation_brackets(extras_definition, simple_df):
extras_definition.validate(simple_df)


def test_validation_fails_variable(simple_definition, simple_df):
def test_validation_fails_variable(simple_definition, simple_df, caplog):
"""Changing a variable name raises"""
simple_df.rename(variable={"Primary Energy": "foo"}, inplace=True)

with pytest.raises(ValueError, match=MATCH_FAIL_VALIDATION):
simple_definition.validate(simple_df)
assert (
"Please refer to https://files.ece.iiasa.ac.at/data/data-template.xlsx"
" for the list of allowed variables." in caplog.text
)


def test_validation_fails_unit(simple_definition, simple_df):
def test_validation_fails_unit(simple_definition, simple_df, caplog):
"""Changing a unit raises"""
simple_df.rename(unit={"EJ/yr": "GWh/yr"}, inplace=True)

with pytest.raises(ValueError, match=MATCH_FAIL_VALIDATION):
simple_definition.validate(simple_df)
assert (
"Please refer to https://files.ece.iiasa.ac.at/data/data-template.xlsx"
" for the list of allowed units." in caplog.text
)


def test_validation_fails_region(simple_definition, simple_df):
def test_validation_fails_region(simple_definition, simple_df, caplog):
"""Changing a region name raises"""
simple_df.rename(region={"World": "foo"}, inplace=True)

with pytest.raises(ValueError, match=MATCH_FAIL_VALIDATION):
simple_definition.validate(simple_df)
assert (
"Please refer to https://files.ece.iiasa.ac.at/data/data-template.xlsx"
" for the list of allowed regions." in caplog.text
)


def test_validation_fails_region_as_int(simple_definition, simple_df):
Expand Down

0 comments on commit 1e9faca

Please sign in to comment.