From d2760eca82b05c70438c044bbaad965bfa094aca Mon Sep 17 00:00:00 2001
From: Daniel Huppmann <>
Date: Wed, 17 Nov 2021 20:22:40 +0100
Subject: [PATCH] Reimplement validation for subannual/datetime data (#129)

 .github/workflows/pytest.yml               | 33 ++++++++
 .github/workflows/validation.yml           |  6 +-
 {openentrance/tests => tests}/ |  6 +-
 tests/                  | 54 +++++++++++++
 tests/                     | 90 ++++++++++++++++++++++                                | 33 +++++++-
 6 files changed, 213 insertions(+), 9 deletions(-)
 create mode 100644 .github/workflows/pytest.yml
 rename {openentrance/tests => tests}/ (58%)
 create mode 100644 tests/
 create mode 100644 tests/

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
new file mode 100644
index 00000000..c42402be
--- /dev/null
+++ b/.github/workflows/pytest.yml
@@ -0,0 +1,33 @@
+# This workflow will install Python dependencies and run the tests
+# For more information see:
+name: Pytest
+  push:
+    branches: [ '**' ]
+  pull_request:
+    branches: [ '**' ]
+  tests:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python 3.9
+      uses: actions/setup-python@v1
+      with:
+        python-version: 3.9
+    - name: Install dependencies
+      run: |
+        pip install -r requirements.txt
+        pip install pytest
+    - name: Install and test package functions
+      run: |
+        pip install --editable .
+        pytest tests
diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml
index 0d1520cb..c0c6f3d7 100644
--- a/.github/workflows/validation.yml
+++ b/.github/workflows/validation.yml
@@ -1,7 +1,7 @@
 # This workflow will install Python dependencies and validate the project
 # For more information see:
-name: Validate the project
+name: Nomenclature
@@ -10,7 +10,7 @@ on:
     branches: [ '**' ]
-  pytest:
+  validation:
     runs-on: ubuntu-latest
@@ -22,7 +22,7 @@ jobs:
         python-version: 3.9
-    - name: Install requirements
+    - name: Install dependencies
       run: pip install -r requirements.txt
     - name: Run the nomenclature project validation
diff --git a/openentrance/tests/ b/tests/
similarity index 58%
rename from openentrance/tests/
rename to tests/
index 233fc702..e1809ed8 100644
--- a/openentrance/tests/
+++ b/tests/
@@ -1,12 +1,12 @@
-import openentrance as oe
+from openentrance import iso_mapping, nuts_hierarchy
 def test_iso_mapping():
     # check that iso-mapping dictionary is not empty and has specific elements
     for name in ["GR", "GRC", "EL"]:
-        assert oe.iso_mapping[name] == "Greece"
+        assert iso_mapping[name] == "Greece"
 def test_nuts_hierarchy():
     # check that nuts-hierarchy is not empty and has specific elements
-    assert oe.nuts_hierarchy["Belgium"]["BE2"]["BE24"] == ["BE241", "BE242"]
+    assert nuts_hierarchy["Belgium"]["BE2"]["BE24"] == ["BE241", "BE242"]
diff --git a/tests/ b/tests/
new file mode 100644
index 00000000..3139da57
--- /dev/null
+++ b/tests/
@@ -0,0 +1,54 @@
+import nomenclature
+definition = nomenclature.DataStructureDefinition("definitions")
+def test_variables():
+    # check that regions dictionary is not empty and has specific element
+    assert "Emissions|CO2" in definition.variable
+def test_variables_fuel_types():
+    # check that exploding of <Fuel> to fuels works (including CCS subcategory)
+    obs = definition.variable["Secondary Energy|Electricity|Gas"]
+    exp = (
+        "Net electricity production from natural gas "
+        "(including methane from biomass or hydrogenation)"
+    )
+    assert obs["description"] == exp
+    obs = definition.variable["Secondary Energy|Electricity|Gas|w/ CCS"]
+    exp = (
+        "Net electricity production from natural gas (including methane "
+        "from biomass or hydrogenation) with a CO2 capture component"
+    )
+    assert obs["description"] == exp
+def test_variables_industry_types():
+    # check that exploding of <industry> to industries works
+    obs = definition.variable["Capital|iAGRI"]
+    exp = "Total capital costs spend by agriculture"
+    assert obs["description"] == exp
+def test_variables_transport_types():
+    # check that exploding of <transport> to transportation modes works
+    obs = definition.variable["Energy Service|Transportation|Freight|Rail"]
+    exp = (
+        "Provision of energy services related to freight "
+        "rail-based transportation technologies"
+    )
+    assert obs["description"] == exp
+def test_variables_product_types():
+    # check that exploding of <product> to procuts works
+    obs = definition.variable["Consumption|Households|pAGRI|Imported"]
+    exp = "Consumption of imported agriculture by households"
+    assert obs["description"] == exp
+def test_regions():
+    # check that regions dictionary is not empty and has specific element
+    assert "Europe" in definition.region
diff --git a/tests/ b/tests/
new file mode 100644
index 00000000..bf1571c9
--- /dev/null
+++ b/tests/
@@ -0,0 +1,90 @@
+import pandas as pd
+from pyam import IamDataFrame
+import pytest
+import sys
+from workflow import main as workflow
+TEST_DF = pd.DataFrame(
+    [
+        ["model_a", "scen_a", "Europe", "Primary Energy", "EJ/yr", 1, 6.0],
+    ],
+    columns=["model", "scenario", "region", "variable", "unit", 2005, 2010],
+df = IamDataFrame(TEST_DF)
+def validate(df):
+    try:
+        workflow(df)
+        return True
+    except ValueError as e:
+        print(e)
+        return False
+def test_validate():
+    # test simple validation
+    assert validate(df)
+def test_validate_fail():
+    # test that simple validation fails on variable and region dimension
+    assert not (validate(df.rename(variable={"Primary Energy": "foo"})))
+    assert not (validate(df.rename(region={"Europe": "foo"})))
+def _test_validate_directional():
+    # test that validation works as expected with directional data
+    assert validate(df.rename(region={"Europe": "Austria>Germany"}))
+    assert not validate(df.rename(region={"Europe": "Austria>foo"}))
+    # test that directional data with more than one `>` fails
+    assert not validate(df.rename(region={"Europe": "Austria>Italy>France"}))
+def test_validate_subannual_months():
+    # test that validation works as expected with months
+    # (and representative timeslices generally)
+    assert validate(IamDataFrame(TEST_DF, subannual="January"))
+    assert not validate(IamDataFrame(TEST_DF, subannual="foo"))
+    "subannual, status",
+    [
+        ("01-01 00:00+01:00", True),
+        ("01-01 00:00", False),
+        ("01-01 00:00+02:00", False),
+        ("01-32 00:00+01:00", False),
+    ],
+def test_validate_subannual_datetime(subannual, status):
+    # test that validation works as expected with continuous time as subannual
+    assert validate(IamDataFrame(TEST_DF, subannual=subannual)) == status
+    "rename_mapping, status",
+    [
+        ({2005: "2005-06-17 00:00+01:00", 2010: "2010-06-17 00:00+01:00"}, True),
+        ({2005: "2005-06-17 00:00+02:00", 2010: "2010-06-17 00:00+02:00"}, False),
+        ({2005: "2005-06-17 00:00", 2010: "2010-06-17 00:00"}, False),
+    ],
+def test_validate_time_entry(rename_mapping, status):
+    # test that validation works as expected with datetime-domain
+    _df = IamDataFrame(
+        IamDataFrame(TEST_DF)
+        .data.rename(columns={"year": "time"})
+        .replace(rename_mapping)
+    )
+    assert validate(_df) == status
+def test_validate_unit_entry():
+    assert not (validate(df.rename(unit={"EJ/yr": "MWh"})))
diff --git a/ b/
index 4e0c0027..f975e31c 100755
--- a/
+++ b/
@@ -5,22 +5,49 @@
 here = Path(__file__).absolute().parent
 logger = logging.getLogger(__name__)
+from datetime import datetime, timedelta
+# datetime must be in Central European Time (CET)
+EXP_TZ = "UTC+01:00"
+EXP_TIME_OFFSET = timedelta(seconds=3600)
 def main(df: pyam.IamDataFrame) -> pyam.IamDataFrame:
     """Main function for validation and processing""""Starting openENTRANCE timeseries-upload processing workflow...")
-    if "subannual" in df.dimensions:
+    if "subannual" in df.dimensions or df.time_col == "time":
         dimensions = ["region", "variable", "subannual"]
         dimensions = ["region", "variable"]
     definition = DataStructureDefinition(here / "definitions", dimensions=dimensions)
-    definition.validate(df)
+    definition.validate(df, dimensions=["region", "variable"])
+    # convert to subannual format if data provided in datetime format
     if df.time_col == "time":'Re-casting from "time" column to categorical "subannual" format')
-        df.swap_time_for_year(inplace=True)
+        df = df.swap_time_for_year(subannual=True)
+    # check that any datetime-like items in "subannual" are valid datetime and UTC+01:00
+    if "subannual" in df.dimensions:
+        _datetime = [s for s in df.subannual if s not in definition.subannual]
+        for d in _datetime:
+            try:
+                _dt = datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M%z")
+            except ValueError:
+                try:
+                    datetime.strptime(f"2020-{d}", "%Y-%m-%d %H:%M")
+                except ValueError:
+                    raise ValueError(f"Invalid subannual timeslice: {d}")
+                raise ValueError(f"Missing timezone: {d}")
+            # casting to datetime with timezone was successful
+            if not (_dt.tzname() == EXP_TZ or _dt.utcoffset() == EXP_TIME_OFFSET):
+                raise ValueError(f"Invalid timezone: {d}")
     return df