diff --git a/src/ascat/file_handling.py b/src/ascat/file_handling.py index 609244fb..19134457 100644 --- a/src/ascat/file_handling.py +++ b/src/ascat/file_handling.py @@ -439,22 +439,10 @@ def read(self, *fmt_args, fmt_kwargs=None, cls_kwargs=None): fn_read_fmt, sf_read_fmt, _, _ = self._fmt(*fmt_args, **fmt_kwargs) search_filename = self.ft.build_filename(fn_read_fmt, sf_read_fmt) filename = glob.glob(search_filename) + self._open(filename) data = None - if len(filename) == 0: - msg = f"File not found: {search_filename}" - if self.err: - raise IOError(msg) - else: - warnings.warn(msg) - elif len(filename) > 1: - msg = "Multiple files found" - if self.err: - raise RuntimeError(msg) - else: - warnings.warn(msg) - else: - data = self.read_file(filename[0], cls_kwargs) + data = self.fid.read(**cls_kwargs) return data @@ -824,8 +812,174 @@ def read_period( return data +class Filenames: + """ + A class to handle operations on multiple filenames. + + This class provides methods for reading from, writing to, and merging data from multiple files. + """ + + def __init__(self, filenames): + """ + Initialize Filenames. + + Parameters + ---------- + filenames : str, Path, or list + File path(s) to be handled. + """ + if isinstance(filenames, (str, Path)): + filenames = [filenames] + elif not isinstance(filenames, list): + raise ValueError("filenames must be a string or list of strings.") + + self.filenames = [Path(f) for f in filenames] + + def _read(self, filename, **kwargs): + """ + Read data from a single file. + + This method should be implemented by subclasses. + + Parameters + ---------- + filename : Path + The file to read from. + **kwargs : dict + Additional keyword arguments for reading. + + Raises + ------ + NotImplementedError + If the method is not implemented in a subclass. + """ + raise NotImplementedError + + def _merge(self, data, **kwargs): + """ + Merge multiple data objects. + + This method should be implemented by subclasses. + + Parameters + ---------- + data : list + List of data objects to merge. + **kwargs : dict + Additional keyword arguments for merging. + + Raises + ------ + NotImplementedError + If the method is not implemented in a subclass. + """ + raise NotImplementedError + + def _write(self, data, filename, **kwargs): + """ + Write data to a single file. + + This method should be implemented by subclasses. + + Parameters + ---------- + data : object + The data to write. + filename : Path + The file to write to. + **kwargs : dict + Additional keyword arguments for writing. + + Raises + ------ + NotImplementedError + If the method is not implemented in a subclass. + """ + raise NotImplementedError + + def write(self, data): + """ + Write data to file. + + If there's only one filename, write provided data to that file. + + Parameters + ---------- + data : object + The data to write. + + TODO: Add support for writing to multiple files by passing a list of data and + checking if the number of data objects matches the number of filenames. + """ + n_filenames = len(self.filenames) + if n_filenames == 1: + filename = self.filenames[0] + filename.parent.mkdir(parents=True, exist_ok=True) + self._write(data, filename) + # elif n_filenames > 1: + # if len(data) == n_filenames: + # for f in self.filenames: + # f.parent.mkdir(parents=True, exist_ok=True) + # self.write(data, f) + + def read(self, **kwargs): + """ + Read all data from files. + + Returns + ------- + object + Merged data from all files. + """ + data = [d for d in self.iter_read(**kwargs)] + data = self.merge(data) + return data + + def iter_read(self, **kwargs): + """ + Iterate over all files and yield data. -class Csv: + Yields + ------ + object + Data read from each file. + """ + for filename in self.filenames: + yield self._read(filename, **kwargs) + + def merge(self, data): + """ + Merge data from multiple data objects. + + Parameters + ---------- + data : list + List of data objects. + + Returns + ------- + object + Merged data, or None if the input list is empty. + """ + if len(data) > 1: + data = self._merge(data) + elif len(data) == 1: + data = data[0] + else: + data = None + + return data + + def close(self): + """ + Close file(s). + + This method can be overridden in subclasses if necessary. + """ + pass + + +class CsvFile(Filenames): """ Read and write single CSV file. """ @@ -841,8 +995,8 @@ def __init__(self, filename, mode="r"): mode : str, optional File opening mode. """ - self.filename = filename self.mode = mode + super().__init__(filename) def header2dtype(self, header): """ @@ -875,16 +1029,16 @@ def header2dtype(self, header): return np.dtype(dtype_list) - def read(self): + def _read(self, filename): """ Read data from CSV file. Parameters ---------- - timestamp : datetime - Time stamp. + filename : str + Filename. """ - with open(self.filename) as fid: + with open(filename) as fid: header = fid.readline() dtype = self.header2dtype(header) data = np.loadtxt(fid, dtype) @@ -917,7 +1071,7 @@ def read_period(self, dt_start, dt_end): return data - def write(self, data): + def _write(self, data, filename): """ Write data to CSV file. @@ -927,10 +1081,10 @@ def write(self, data): Data. """ header = data.dtype.__repr__() - np.savetxt(self.filename, data, fmt="%s", header=header) + np.savetxt(filename, data, fmt="%s", header=header) @staticmethod - def merge(data): + def _merge(data): """ Merge data. @@ -964,7 +1118,7 @@ def __init__(self, root_path): fn_templ = "prefix_{date}_{now}_postfix.csv" sf_templ = {"Y": "{year}", "M": "{month}"} - super().__init__(root_path, Csv, fn_templ, sf_templ=sf_templ) + super().__init__(root_path, CsvFile, fn_templ, sf_templ=sf_templ) def _fmt(self, timestamp): """ diff --git a/tests/test_file_handling.py b/tests/test_file_handling.py index 94795326..236da38f 100644 --- a/tests/test_file_handling.py +++ b/tests/test_file_handling.py @@ -40,7 +40,7 @@ from ascat.file_handling import FilenameTemplate from ascat.file_handling import FileSearch from ascat.file_handling import ChronFiles -from ascat.file_handling import Csv +from ascat.file_handling import CsvFile from ascat.file_handling import CsvFiles @@ -57,7 +57,7 @@ def generate_test_data(): filename = tmpdir / "temperature" / tile / f"202201{num}_ascat.csv" - csv_file = Csv(filename, mode="w") + csv_file = CsvFile(filename, mode="w") file_dates = [ datetime.strptime(f"202201{num}", "%Y%m%d") + timedelta(seconds=(3600 * i + seconds)) @@ -99,7 +99,7 @@ def generate_test_data(): "%H%M%S"), file_dates[-2].strftime("%H%M%S") filename = folder / Path(f"ascat_{d1}_{t1}-{d2}_{t2}.csv") - csv_file = Csv(filename, mode="w") + csv_file = CsvFile(filename, mode="w") file_precips = random.choices( range(0, 5), k=len(file_dates) - 1) dtype = np.dtype([("date", "datetime64[s]"), @@ -435,7 +435,7 @@ def setUp(self): """ Setup test. """ - cls = Csv + cls = CsvFile fn_templ = "{date}_ascat.csv" sf_templ = {"variables": "{variable}", "tiles": "{tile}"} cls_kwargs = None