diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 5c04f27..9aa24c7 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -40,6 +40,10 @@ jobs: experimental: [ false ] os: [ ubuntu-22.04 ] steps: + - name: Install system packages + run: | + sudo apt update + sudo apt install libudunits2-dev - uses: actions/checkout@v3 with: lfs: 'true' diff --git a/.gitignore b/.gitignore index 2dc53ca..d42c243 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,5 @@ cython_debug/ # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. .idea/ + +.vscode/ \ No newline at end of file diff --git a/README.md b/README.md index 76b583e..c90c5fe 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,10 @@ The MSC-W database contains the EBAS database for 1990-2021 and the EEA_Airquip contain already hourly data if enough hours have been measured. Therefore, `resolution` is a required parameter. +### harp +Reader for NetCDF files that follow the [HARP](http://stcorp.github.io/harp/doc/html/conventions/) +conventions. + ### nilupmfebas: EBAS format (Nasa-Ames) Reader for random EBAS data in NASA-AMES format. This reader is tested only with PMF data provided by NILU, but should in principle able to read any random text file in EBAS NASA-AMES. @@ -109,8 +113,40 @@ with pyaro.open_timeseries( data.altitudes # values data.values +``` + +### harpreader +```python +import pyaro + +TEST_URL = "/lustre/storeB/project/aerocom/aerocom1/AEROCOM_OBSDATA/CNEMC/aggregated/sinca-surface-157-999999-001.nc" +with pyaro.open_timeseries( + 'harp', TEST_URL +) as ts: + data = ts.data("CO_volume_mixing_ratio") + data.units # ppm + # stations + data.stations + # start_times + data.start_times + # stop_times + data.end_times + # latitudes + data.latitudes + # longitudes + data.longitudes + # altitudes + data.altitudes + # values + data.values ``` + + +### geocoder_reverse_natural_earth +geocoder_reverse_natural_earth is small helper to identify country codes for obs networks that don't mention the +countrycode of a station in their location data + ### nilupmfebas ```python import pyaro diff --git a/pyproject.toml_future b/pyproject.toml_future index cfc33c1..553220e 100644 --- a/pyproject.toml_future +++ b/pyproject.toml_future @@ -29,6 +29,8 @@ dependencies = [ "shapely", "rtree", "tqdm", + "xarray", + "cfunits" ] [tool.setuptools] diff --git a/setup.cfg b/setup.cfg index 7534d7b..3ada66e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -27,10 +27,12 @@ install_requires = requests tqdm numpy + xarray + cfunits package_dir = =src -packages = pyaro_readers.aeronetsunreader, pyaro_readers.aeronetsdareader, pyaro_readers.ascii2netcdf, pyaro_readers.nilupmfebas +packages = pyaro_readers.aeronetsunreader, pyaro_readers.aeronetsdareader, pyaro_readers.ascii2netcdf, pyaro_readers.harpreader, pyaro_readers.nilupmfebas test_require = tox:tox [options.package_data] @@ -41,7 +43,7 @@ pyaro.timeseries = aeronetsdareader = pyaro_readers.aeronetsdareader:AeronetSdaTimeseriesEngine ascii2netcdf = pyaro_readers.ascii2netcdf:Ascii2NetcdfTimeseriesEngine nilupmfebas = pyaro_readers.nilupmfebas:EbasPmfTimeseriesEngine - + harp = pyaro_readers.harpreader:AeronetHARPEngine [tox:tox] min_version = 4.0 diff --git a/src/pyaro_readers/harpreader/__init__.py b/src/pyaro_readers/harpreader/__init__.py new file mode 100644 index 0000000..58dae40 --- /dev/null +++ b/src/pyaro_readers/harpreader/__init__.py @@ -0,0 +1,4 @@ +from .harpreader import ( + AeronetHARPEngine, + AeronetHARPReader, +) diff --git a/src/pyaro_readers/harpreader/harpreader.py b/src/pyaro_readers/harpreader/harpreader.py new file mode 100644 index 0000000..98c9f36 --- /dev/null +++ b/src/pyaro_readers/harpreader/harpreader.py @@ -0,0 +1,160 @@ +import glob +import inspect +from pyaro.timeseries import ( + AutoFilterReaderEngine, + Station, + Data, + NpStructuredData, + Flag, +) +import logging +import os +import xarray as xr +import numpy as np +from collections import namedtuple +import re +import cfunits +import pyaro + +logger = logging.getLogger(__name__) + + +class HARPReaderException(Exception): + pass + + +class AeronetHARPReader(AutoFilterReaderEngine.AutoFilterReader): + """ + Reader for netCDF files which follow the HARP convention. + """ + + def __init__(self, file: str): + self._filters = [] + if os.path.isfile(file): + self._file = file + else: + raise HARPReaderException(f"No such file: {file}") + + with xr.open_dataset(self._file) as harp: + if harp.attrs.get("Conventions", None) != "HARP-1.0": + raise ValueError(f"File is not a HARP file.") + + self._variables = self._read_file_variables() + + def _unfiltered_stations(self) -> dict[str, Station]: + pass + + def close(self): + pass + + def _read_file_variables(self) -> dict[str, str]: + """Returns a mapping of variable name to unit for the dataset. + + Returns: + -------- + dict[str, str] : + A dictionary mapping variable name to its corresponding unit. + + """ + variables = {} + with xr.open_dataset(self._file, decode_cf=False) as d: + for vname, var in d.data_vars.items(): + variables[vname] = cfunits.Units(var.attrs["units"]) + + return variables + + def _unfiltered_data(self, varname: str) -> NpStructuredData: + """Returns unfiltered data for a variable. + + Parameters: + ----------- + varname : str + The variable name for which to return the data. + + Returns: + -------- + NpStructuredArray + The data. + + """ + + units = self._variables[varname] + data = NpStructuredData(varname, units) + + pattern = "" + if os.path.isdir(self._file): + pattern = os.path.join(self._file, "*.nc") + else: + pattern = self._file + + for f in glob.glob(pattern): + self._get_data_from_single_file(f, varname, data) + + return data + + def _get_data_from_single_file( + self, file: str, varname: str, data: NpStructuredData + ) -> None: + """Loads data for a variable from a single file. + + Parameters: + ----------- + file : str + The file path. + varname : str + The variable name. + data : NpStructuredData + Data instance to which the data will be appended to in-place. + + """ + dt = xr.open_dataset(file) + + values = dt[varname].to_numpy() + + values_length = len(values) + start_time = np.asarray(dt["datetime_start"]) + stop_time = np.asarray(dt["datetime_stop"]) + lat = np.asarray([dt["latitude"]] * values_length) + long = np.asarray([dt["longitude"]] * values_length) + station = np.asarray([np.nan] * values_length) + altitude = np.asarray([dt["altitude"]] * values_length) + + flags = np.asarray([Flag.VALID] * values_length) + data.append( + value=values, + station=station, + latitude=lat, + longitude=long, + altitude=altitude, + start_time=start_time, + end_time=stop_time, + # TODO: Currently assuming that all observations are valid. + flag=flags, + standard_deviation=np.asarray([np.nan] * values_length), + ) + + def _unfiltered_variables(self) -> list[str]: + """Returns a list of the variable names. + + Returns: + list[str] + The list of variable names. + """ + return list(self._variables.keys()) + + def close(self): + pass + + +class AeronetHARPEngine(AutoFilterReaderEngine.AutoFilterEngine): + def reader_class(self): + return AeronetHARPReader + + def open(self, filename: str, *args, **kwargs) -> AeronetHARPReader: + return self.reader_class()(filename, *args, **kwargs) + + def description(self): + return inspect.doc(self) + + def url(self): + return "https://github.com/metno/pyaro-readers" diff --git a/tests/test_HARPReader.py b/tests/test_HARPReader.py new file mode 100644 index 0000000..6dae49d --- /dev/null +++ b/tests/test_HARPReader.py @@ -0,0 +1,18 @@ +import unittest +import pyaro +import pyaro.timeseries +import cfunits + + +class TestHARPReader(unittest.TestCase): + engine = "harp" + + def test_1read(self): + with pyaro.open_timeseries( + self.engine, + "tests/testdata/sinca-surface-157-999999-001.nc", + ) as ts: + data = ts.data("CO_volume_mixing_ratio") + + self.assertGreater(len(data), 10000) + self.assertEqual(data.units, cfunits.Units("ppm")) diff --git a/tests/testdata/sinca-surface-157-999999-001.nc b/tests/testdata/sinca-surface-157-999999-001.nc new file mode 100644 index 0000000..3eddcc4 --- /dev/null +++ b/tests/testdata/sinca-surface-157-999999-001.nc @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae677cff326e576e67ec0fc83a959d0c06071ff96ec09e69bed5c565439ddd3c +size 565757