Skip to content

Commit

Permalink
add test using a directory as input
Browse files Browse the repository at this point in the history
  • Loading branch information
Jan Griesfeller committed Apr 15, 2024
1 parent 7789847 commit 9cc35f0
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 175 deletions.
153 changes: 85 additions & 68 deletions src/pyaro_readers/nilupmfebas/EbasPmfReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
logger = logging.getLogger(__name__)

FILL_COUNTRY_FLAG = False
FILE_MASK = "*.nas"


class EbasPmfTimeseriesReader(AutoFilterReaderEngine.AutoFilterReader):
Expand All @@ -24,87 +25,35 @@ def __init__(
filters=[],
tqdm_desc: [str, None] = None,
ts_type: str = "daily",
filemask: str = FILE_MASK,
):
self._stations = {}
self._data = {} # var -> {data-array}
self._set_filters(filters)
self._header = []
self._opts = {"default": ReadEbasOptions()}

if Path(filename).is_dir():
realpath = Path(filename).resolve()

if Path(realpath).is_dir():
# search directory for files
pass
elif Path(filename).is_file():
self._file_dummy = self.read_file(filename)
matrix = self._file_dummy.meta["matrix"]
if self._file_dummy.meta["component"] == "":
# multicolumn file: ebas var names come from self._file_dummy.col_names_vars
unique_vars = list(set(self._file_dummy.col_names_vars))
add_meta_flag = True
for var_idx in range(len(self._file_dummy.var_defs)):
# continue if the variable is not an actual data variable (but e.g. time)
if not self._file_dummy.var_defs[var_idx].is_var:
continue
# continue if the statistcs is to be ignored
if (
self._file_dummy.var_defs[var_idx].statistics
in self._opts["default"].ignore_statistics
):
continue
var_name = f"{matrix}_{self._file_dummy.var_defs[var_idx].name}"
if add_meta_flag:
stat_name = self._file_dummy.meta["station_code"]
country = self._file_dummy.meta["station_code"][0:2]

lat = float(self._file_dummy.meta["station_latitude"])
lon = float(self._file_dummy.meta["station_longitude"])
alt = float(
self._file_dummy.meta["station_altitude"].split(" ")[0]
)

self._stations[stat_name] = Station(
{
"station": stat_name,
"longitude": lon,
"latitude": lat,
"altitude": alt,
"country": country,
"url": "",
"long_name": stat_name,
}
)
add_meta_flag = False

# we might want to put a CF compliant unit here
self._data[var_name] = NpStructuredData(
var_name, self._file_dummy.meta["unit"]
)
# now add ts after ts
for t_idx, ts in enumerate(self._file_dummy.start_meas):
self._data[var_name].append(
float(self._file_dummy.data[t_idx, var_idx]), # value
stat_name,
lat,
lon,
alt,
ts,
self._file_dummy.stop_meas[t_idx],
Flag.VALID,
np.nan,
)
# print(self._file_dummy.stop_meas[t_idx])
pass

pass
else:
# single column file
pass
files = list(realpath.glob(filemask))
bar = tqdm(desc=tqdm_desc, total=len(files))

for _ridx, file in enumerate(files):
# print(file)
bar.update(1)
self.read_file(file)
bar.close()
elif Path(realpath).is_file():
self.read_file(realpath)

else:
# filename is something else
# Error
pass

def read_file(
def read_file_basic(
self,
filename,
):
Expand All @@ -124,6 +73,74 @@ def read_file(

return data_out

def read_file(self, filename):
"""Read EBAS NASA Ames file and put the data in the object"""

self._file_dummy = self.read_file_basic(filename)
matrix = self._file_dummy.meta["matrix"]
if self._file_dummy.meta["component"] == "":
# multicolumn file: ebas var names come from self._file_dummy.col_names_vars
# unique_vars = list(set(self._file_dummy.col_names_vars))
add_meta_flag = True
for var_idx in range(len(self._file_dummy.var_defs)):
# continue if the variable is not an actual data variable (but e.g. time)
if not self._file_dummy.var_defs[var_idx].is_var:
continue
# continue if the statistcs is to be ignored
try:
if (
self._file_dummy.var_defs[var_idx].statistics
in self._opts["default"].ignore_statistics
):
continue
except KeyError:
pass

var_name = f"{matrix}#{self._file_dummy.var_defs[var_idx].name}"
if add_meta_flag:
stat_name = self._file_dummy.meta["station_code"]
country = self._file_dummy.meta["station_code"][0:2]

lat = float(self._file_dummy.meta["station_latitude"])
lon = float(self._file_dummy.meta["station_longitude"])
alt = float(self._file_dummy.meta["station_altitude"].split(" ")[0])

self._stations[stat_name] = Station(
{
"station": stat_name,
"longitude": lon,
"latitude": lat,
"altitude": alt,
"country": country,
"url": "",
"long_name": stat_name,
}
)
add_meta_flag = False

# we might want to put a CF compliant unit here
self._data[var_name] = NpStructuredData(
var_name, self._file_dummy.meta["unit"]
)
# now add ts after ts
for t_idx, ts in enumerate(self._file_dummy.start_meas):
self._data[var_name].append(
float(self._file_dummy.data[t_idx, var_idx]), # value
stat_name,
lat,
lon,
alt,
ts,
self._file_dummy.stop_meas[t_idx],
Flag.VALID,
np.nan,
)
# print(self._file_dummy.stop_meas[t_idx])
# pass
else:
# single column file
pass

def _unfiltered_data(self, varname) -> Data:
return self._data[varname]

Expand Down
115 changes: 8 additions & 107 deletions tests/test_EbasPmfTimeSeriesReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,120 +17,21 @@ class TestAERONETTimeSeriesReader(unittest.TestCase):
"SI0008R.20171129230000.20210615130447.low_vol_sampler..pm25.32d.1d.SI01L_ARSO_pm25vz_2.SI01L_ARSO_ECOC_1.lev2.nas",
)

testdata_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "testdata", "PMF_EBAS")

def test_0engine(self):
self.assertIn(self.engine, pyaro.list_timeseries_engines())

def test_1open(self):
def test_1open_single_file(self):
with pyaro.open_timeseries(self.engine, self.file, filters=[]) as ts:
self.assertGreaterEqual(len(ts.variables()), 1)
self.assertEqual(len(ts.stations()), 1)

# def external_resource_available(self, url):
# try:
# req = urllib.request.Request(TEST_URL, method="HEAD")
# resp = urllib.request.urlopen(req)
# resp.url
# return True
# except:
# return False
#
# def test_dl_data_unzipped(self):
# if not self.external_resource_available(TEST_URL):
# self.skipTest(f"external resource not available: {TEST_URL}")
# engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
# with engine.open(
# TEST_URL,
# filters=[],
# fill_country_flag=False,
# tqdm_desc="test_dl_data_unzipped",
# ) as ts:
# count = 0
# for var in ts.variables():
# count += len(ts.data(var))
# self.assertEqual(count, 49965)
# self.assertEqual(len(ts.stations()), 4)
#
# def test_dl_data_zipped(self):
# if not self.external_resource_available(TEST_ZIP_URL):
# self.skipTest(f"external resource not available: {TEST_ZIP_URL}")
# engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
# with engine.open(
# TEST_ZIP_URL,
# filters=[],
# fill_country_flag=False,
# tqdm_desc="test_dl_data_zipped",
# ) as ts:
# count = 0
# for var in ts.variables():
# count += len(ts.data(var))
# self.assertEqual(count, 49965)
# self.assertEqual(len(ts.stations()), 4)
#
# def test_aeronet_data_zipped(self):
# if not os.path.exists("/lustre"):
# self.skipTest(f"lustre not available; skipping Aeronet download on CI")
#
# if not self.external_resource_available(AERONETSUN_URL):
# self.skipTest(f"external resource not available: {AERONETSUN_URL}")
# engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
# with engine.open(
# AERONETSUN_URL,
# filters=[],
# fill_country_flag=False,
# tqdm_desc="aeronet data zipped",
# ) as ts:
# count = 0
# for var in ts.variables():
# count += len(ts.data(var))
# self.assertGreaterEqual(count, 49965)
# self.assertGreaterEqual(len(ts.stations()), 4)
#
# def test_init(self):
# engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
# self.assertEqual(engine.url(), "https://github.com/metno/pyaro-readers")
# # just see that it doesn't fail
# engine.description()
# engine.args()
# with engine.open(
# self.file, filters=[], fill_country_flag=True, tqdm_desc="test_init"
# ) as ts:
# count = 0
# for var in ts.variables():
# count += len(ts.data(var))
# self.assertEqual(count, 49965)
# self.assertEqual(len(ts.stations()), 4)
#
# def test_stationfilter(self):
# engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
# sfilter = pyaro.timeseries.filters.get("stations", exclude=["Cuiaba"])
# with engine.open(
# self.file, filters=[sfilter], tqdm_desc="test_stationfilter"
# ) as ts:
# count = 0
# for var in ts.variables():
# count += len(ts.data(var))
# self.assertEqual(count, 48775)
# self.assertEqual(len(ts.stations()), 3)
#
# def test_wrappers(self):
# engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
# new_var_name = "od500aer"
# with VariableNameChangingReader(
# engine.open(self.file, filters=[]), {"AOD_500nm": new_var_name}
# ) as ts:
# self.assertEqual(ts.data(new_var_name).variable, new_var_name)
# pass
#
# def test_variables_filter(self):
# engine = pyaro.list_timeseries_engines()["aeronetsunreader"]
# new_var_name = "od550aer"
# vfilter = pyaro.timeseries.filters.get(
# "variables", reader_to_new={"AOD_550nm": new_var_name}
# )
# with engine.open(
# self.file, filters=[vfilter], tqdm_desc="test_variables_filter"
# ) as ts:
# self.assertEqual(ts.data(new_var_name).variable, new_var_name)
def test_2open_directory(self):
with pyaro.open_timeseries(self.engine, self.testdata_dir, filters=[]) as ts:
self.assertGreaterEqual(len(ts.variables()), 3)
self.assertEqual(len(ts.stations()), 7)



if __name__ == "__main__":
Expand Down

0 comments on commit 9cc35f0

Please sign in to comment.