Skip to content

Commit

Permalink
Added netcdf (#273)
Browse files Browse the repository at this point in the history
* netcdf

* fixed comments

* one file

* added unit tests

* document

* added example

* edit readme

* lowercase

* added additional logic:

* changing how output works, adding tests for output (#274)

* added backticks

* missed ths comment

---------

Co-authored-by: Kyle Shores <[email protected]>
  • Loading branch information
montythind and K20shores authored Nov 12, 2024
1 parent bdafc56 commit 825c560
Show file tree
Hide file tree
Showing 7 changed files with 331 additions and 31 deletions.
32 changes: 28 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,16 +32,40 @@ Run an example. Notice that the output, in csv format, is printed to the termina
music_box -e Chapman
```

You can also run your own configuration
Output can be saved to a file in csv file when no `--output-format` is passed

```
music_box -c my_config.json
music_box -e Chapman -o output.csv
```

Output can be saved to a file
Output can be saved to a file as csv file when `--output-format` csv is passed

```
music_box -e Chapman -o output.csv
music_box --output-format csv -e Chapman -o output.csv
```

Output can be saved to a file as netcdf file when `--output-format` netcdf is passed

```
music_box --output-format netcdf -e Chapman -o output.nc
```

Output can be saved to a file in csv file to output.csv when no output path is given but `--output-format` is csv

```
music_box --output-format csv -e Chapman
```

Output can be saved to a file in netcdf file to output.nc when no output path is given but `--output-format` is netcdf

```
music_box --output-format netcdf -e Chapman
```

You can also run your own configuration

```
music_box -c my_config.json
```

And, if you have gnuplot installed, some basic plots can be made to show some resulting concentrations
Expand Down
1 change: 1 addition & 0 deletions src/acom_music_box/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@
from .evolving_conditions import EvolvingConditions
from .music_box import MusicBox
from .examples import Examples
from .data_output import DataOutput
156 changes: 156 additions & 0 deletions src/acom_music_box/data_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import os
import datetime
import logging

logger = logging.getLogger(__name__)

class DataOutput:
"""
A class to handle data output operations for a DataFrame, including converting to CSV
or NetCDF formats with appended units for columns. Designed for environmental data
with specific units and formats.
This class manages file paths, unit mappings, and data output formats based on
the provided arguments, ensuring valid paths and creating necessary directories.
Parameters
----------
df : pandas.DataFrame
The DataFrame containing the data to output.
args : argparse.Namespace
Arguments specifying output path, format, and additional options.
Attributes
----------
df : pandas.DataFrame
The DataFrame to be output.
args : argparse.Namespace
Command-line arguments or configurations specifying output options.
unit_mapping : dict
A dictionary mapping specific columns to their respective units.
Examples
--------
>>> import pandas as pd
>>> from argparse import Namespace
>>> df = pd.DataFrame({
... 'ENV.temperature': [290, 295, 300],
... 'ENV.pressure': [101325, 100000, 98500],
... 'ENV.number_density_air': [102, 5096, 850960],
... 'time': [0, 1, 2]
... })
>>> args = Namespace(output='output.nc', output_format='netcdf')
>>> data_output = DataOutput(df, args)
>>> data_output.output()
"""

def __init__(self, df, args):
"""
Initialize the DataOutput class with a DataFrame and configuration arguments.
Parameters
----------
df : pandas.DataFrame
The DataFrame containing the data to be output.
args : argparse.Namespace
Arguments specifying the output configuration, such as file path and format.
Notes
-----
The `args` argument should have the following attributes:
- output : str
The path to save the output file.
- output_format : str, optional
Format of the output file, either 'csv' or 'netcdf'. Defaults to 'csv'.
"""
self.df = df
self.args = args
self.unit_mapping = {
'ENV.temperature': 'K',
'ENV.pressure': 'Pa',
'ENV.number_density_air': 'kg -m3',
'time': 's'
}

def _get_default_filename(self):
"""Generate a default filename based on the current datetime and output format."""
now = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
extension = 'csv' if self.args.output_format == 'csv' else 'nc'
return f"music_box_{now}.{extension}"

def _ensure_output_path(self):
"""Ensure the output path is valid and create directories if needed."""
if not self.args.output:
self.args.output = self._get_default_filename()

if os.path.isdir(self.args.output):
self.args.output = os.path.join(
self.args.output, self._get_default_filename())

dir_path = os.path.dirname(self.args.output)
if dir_path and not os.path.exists(dir_path):
os.makedirs(dir_path, exist_ok=True)
logger.info(f"Created directory: {dir_path}")

def _append_units_to_columns(self):
"""Append units to DataFrame column names based on unit mapping."""
self.df.columns = [
f"{col}.{self.unit_mapping[col]}" if col in self.unit_mapping else
f"{col}.mol m-3" if col.startswith('CONC.') else col
for col in self.df.columns
]

def _convert_to_netcdf(self):
"""Convert DataFrame to xarray Dataset and save as NetCDF with attributes."""
ds = self.df.set_index(['time']).to_xarray()
for var in ds.data_vars:
if var.startswith('CONC.'):
ds[var].attrs = {'units': 'mol m-3'}

ds['ENV.temperature'].attrs = {'units': 'K'}
ds['ENV.pressure'].attrs = {'units': 'Pa'}
ds['ENV.number_density_air'].attrs = {'units': 'kg -m3'}
ds['time'].attrs = {'units': 's'}

ds.to_netcdf(self.args.output)

def _output_csv(self):
"""Handles CSV output."""
self._append_units_to_columns()
if self.args.output:
self._ensure_output_path()
self.df.to_csv(self.args.output, index=False)
logger.info(f"CSV output written to: {self.args.output}")
else:
print(self.df.to_csv(index=False))

def _output_netcdf(self):
"""Handles NetCDF output."""
if self.args.output:
self._ensure_output_path()
self._convert_to_netcdf()
logger.info(f"NetCDF output written to: {self.args.output}")

def _output_terminal(self):
"""Handles output to terminal."""
self._append_units_to_columns()
print(self.df.to_csv(index=False))

def output(self):
"""Main method to handle output based on the provided arguments."""
# Default output paths based on format
if self.args.output is None:
self.args.output = self._get_default_filename()

# Determine output type and call the respective method
if self.args.output_format is None or self.args.output_format == 'terminal':
self._output_terminal()
elif self.args.output_format is None or self.args.output_format == 'csv':
self._output_csv()
elif self.args.output_format == 'netcdf':
self._output_netcdf()
else:
error = f"Unsupported output format: {self.args.output_format}"
logger.error(error)
raise ValueError(error)

17 changes: 12 additions & 5 deletions src/acom_music_box/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import tempfile
import matplotlib.pyplot as plt
import mplcursors
from acom_music_box import MusicBox, Examples, __version__
from acom_music_box import MusicBox, Examples, __version__, DataOutput


def format_examples_help(examples):
Expand Down Expand Up @@ -36,6 +36,12 @@ def parse_arguments():
type=str,
help='Path to save the output file, including the file name. If not provided, result will be printed to the console.'
)
parser.add_argument(
'--output-format',
choices=['csv', 'netcdf', 'terminal'],
default='terminal',
help="Specify output format: 'terminal' (default), 'csv', or 'netcdf'."
)
parser.add_argument(
'-v', '--verbose',
action='count',
Expand Down Expand Up @@ -154,7 +160,6 @@ def on_add(sel):

plt.show()


def main():
start = datetime.datetime.now()

Expand All @@ -176,6 +181,7 @@ def main():
musicBoxConfigFile = args.config

musicBoxOutputPath = args.output

plot_species_list = args.plot.split(',') if args.plot else None

if not musicBoxConfigFile:
Expand All @@ -189,10 +195,11 @@ def main():
logger.debug(f"Configuration file = {musicBoxConfigFile}")
myBox.loadJson(musicBoxConfigFile)

result = myBox.solve(musicBoxOutputPath)
result = myBox.solve(callback=None)

if musicBoxOutputPath is None:
print(result.to_csv(index=False))
# Create an instance of DataOutput
dataOutput = DataOutput(result, args)
dataOutput.output()

if plot_species_list:
if args.plot_tool == 'gnuplot':
Expand Down
26 changes: 4 additions & 22 deletions src/acom_music_box/music_box.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def add_evolving_condition(self, time_point, conditions):
time=[time_point], conditions=[conditions])
self.evolvingConditions.append(evolving_condition)

def solve(self, output_path=None, callback=None):
def solve(self, callback=None):
"""
Solves the box model simulation and optionally writes the output to a file.
Expand All @@ -68,8 +68,8 @@ def solve(self, output_path=None, callback=None):
the specified file.
Args:
output_path (str, optional): The path to the file where the output will be written. If None, no output file is created. Defaults to None.
callback (function, optional): A callback function that is called after each time step. Defaults to None. The callback will take the most recent results, the current time, conditions, and the total simulation time as arguments.
callback (function, optional): A callback function that is called after each time step. Defaults to None.
The callback will take the most recent results, the current time, conditions, and the total simulation time as arguments.
Returns:
list: A 2D list where each inner list represents the results of the simulation
Expand Down Expand Up @@ -194,25 +194,7 @@ def solve(self, output_path=None, callback=None):
# increments time
curr_time += time_step
pbar.update(time_step)
df = pd.DataFrame(output_array[1:], columns=output_array[0])
# outputs to file if output is present
if output_path is not None:

# Check if the output_path is a full path or just a file name
if os.path.dirname(output_path) == '':
# If output_path is just a filename, use the current directory
output_path = os.path.join(os.getcwd(), output_path)
elif not os.path.basename(output_path):
raise ValueError(f"Invalid output path: '{output_path}' does not contain a filename.")

# Ensure the directory exists
dir_path = os.path.dirname(output_path)
if dir_path and not os.path.exists(dir_path):
os.makedirs(dir_path, exist_ok=True)

df.to_csv(output_path, index=False)

return df
return pd.DataFrame(output_array[1:], columns=output_array[0])

def loadJson(self, path_to_json):
"""
Expand Down
50 changes: 50 additions & 0 deletions tests/integration/test_executable_data_output.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import subprocess
import os
import glob
import pytest
import tempfile

@pytest.fixture
def temp_dir():
with tempfile.TemporaryDirectory() as tmpdirname:
yield tmpdirname

def test_print_results_to_terminal(temp_dir):
result = subprocess.run(['music_box', '-e', 'Analytical'], capture_output=True, text=True, cwd=temp_dir)
assert len(result.stdout) > 0

def test_create_netcdf_with_timestamp(temp_dir):
subprocess.run(['music_box', '-e', 'Analytical', '--output-format', 'netcdf'], cwd=temp_dir)
assert glob.glob(os.path.join(temp_dir, "music_box_*.nc"))

def test_create_csv_with_timestamp(temp_dir):
subprocess.run(['music_box', '-e', 'Analytical', '--output-format', 'csv'], cwd=temp_dir)
assert glob.glob(os.path.join(temp_dir, "music_box_*.csv"))

def test_create_named_csv(temp_dir):
subprocess.run(['music_box', '-e', 'Analytical', '--output-format', 'csv', '-o', 'out.csv'], cwd=temp_dir)
assert os.path.exists(os.path.join(temp_dir, "out.csv"))

def test_create_named_netcdf(temp_dir):
subprocess.run(['music_box', '-e', 'Analytical', '--output-format', 'netcdf', '-o', 'out.nc'], cwd=temp_dir)
assert os.path.exists(os.path.join(temp_dir, "out.nc"))

def test_create_directory_and_named_netcdf(temp_dir):
os.makedirs(os.path.join(temp_dir, "results"), exist_ok=True)
subprocess.run(['music_box', '-e', 'Analytical', '--output-format', 'netcdf', '-o', 'results/out.nc'], cwd=temp_dir)
assert os.path.exists(os.path.join(temp_dir, "results/out.nc"))

def test_create_directory_and_named_csv(temp_dir):
os.makedirs(os.path.join(temp_dir, "results"), exist_ok=True)
subprocess.run(['music_box', '-e', 'Analytical', '--output-format', 'csv', '-o', 'results/out.csv'], cwd=temp_dir)
assert os.path.exists(os.path.join(temp_dir, "results/out.csv"))

def test_create_directory_and_timestamped_csv(temp_dir):
os.makedirs(os.path.join(temp_dir, "results"), exist_ok=True)
subprocess.run(['music_box', '-e', 'Analytical', '--output-format', 'csv', '-o', 'results/'], cwd=temp_dir)
assert glob.glob(os.path.join(temp_dir, "results/music_box_*.csv"))

def test_create_directory_and_timestamped_netcdf(temp_dir):
os.makedirs(os.path.join(temp_dir, "results"), exist_ok=True)
subprocess.run(['music_box', '-e', 'Analytical', '--output-format', 'netcdf', '-o', 'results/'], cwd=temp_dir)
assert glob.glob(os.path.join(temp_dir, "results/music_box_*.nc"))
Loading

0 comments on commit 825c560

Please sign in to comment.