Skip to content

Commit

Permalink
Add docstrings for template and manifest tasks (#63)
Browse files Browse the repository at this point in the history
* Remove extra main test

* Remove hello world

* Add docstring for generate input contents task

* Add docstring for find missing conditions task

* Add docstring for update manifest contents task

* Add docstring for summarize manifest files task

* Add docstring for filter manifest files task
  • Loading branch information
jessicasyu authored Aug 23, 2024
1 parent a1e9a62 commit 05c2b5a
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 13 deletions.
2 changes: 0 additions & 2 deletions src/container_collection/__main__.py
Original file line number Diff line number Diff line change
@@ -1,2 +0,0 @@
if __name__ == "__main__":
print("hello world")
79 changes: 74 additions & 5 deletions src/container_collection/manifest/filter_manifest_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,71 @@ def filter_manifest_files(
include_filters: list[str],
exclude_filters: list[str],
) -> dict:
"""
Filter manifest file keys by incomplete extensions and given filters.
Parameters
----------
manifest
Manifest of file keys, extensions, and locations.
extensions
List of single simulation output extensions.
include_filters
List of Unix filename pattern matching filter for included file keys.
exclude_filters
List of Unix filename pattern matching filter for excluded file keys.
Returns
-------
:
Map of filtered manifest file keys to extensions and locations.
"""

complete_manifest = filter_incomplete_extensions(manifest, extensions)
manifest_files = convert_to_dictionary(complete_manifest)
selected_keys = filter_file_keys(manifest_files, include_filters, exclude_filters)
selected_keys = filter_file_keys(list(manifest_files.keys()), include_filters, exclude_filters)

return {key: manifest_files[key] for key in selected_keys}


def filter_incomplete_extensions(manifest: pd.DataFrame, extensions: list[str]) -> pd.DataFrame:
"""
Filters manifest for files with incomplete set of extensions.
Parameters
----------
manifest
Manifest of file keys, extensions, and locations.
extensions
List of single simulation output extensions.
Returns
-------
:
Filtered manifest of file keys, extensions, and locations.
"""

filtered = manifest.groupby("KEY").filter(
lambda x: len(set(extensions) - set(x["EXTENSION"])) == 0
)
return filtered


def convert_to_dictionary(manifest: pd.DataFrame) -> dict:
"""
Convert manifest dataframe to map of file key to extensions and locations.
Parameters
----------
manifest
Manifest of file keys, extensions, and locations.
Returns
-------
:
Map of file key to extensions and locations.
"""

# Return empty dictionary if there are no entries.
if manifest.empty:
return {}
Expand All @@ -40,19 +90,38 @@ def convert_to_dictionary(manifest: pd.DataFrame) -> dict:
return manifest_dict


def filter_file_keys(files: dict, include_filters: list[str], exclude_filters: list[str]) -> set:
def filter_file_keys(
files: list[str], include_filters: list[str], exclude_filters: list[str]
) -> set:
"""
Filter keys using include and exclude Unix filename pattern matching filters.
Parameters
----------
files
List of file keys.
include_filters
List of Unix filename pattern matching filter for included file keys.
exclude_filters
List of Unix filename pattern matching filter for excluded file keys.
Returns
-------
:
Filtered set of file keys.
"""

# Filter key list by include and exclude filters.
selected_keys = set()
unselected_keys = set()
all_keys = list(files.keys())

# Filter files for matches to include filters.
for include in include_filters:
selected_keys.update([key for key in all_keys if fnmatch(key, include)])
selected_keys.update([key for key in files if fnmatch(key, include)])

# Filter files for matches to exclude filters.
for exclude in exclude_filters:
unselected_keys.update([key for key in all_keys if fnmatch(key, exclude)])
unselected_keys.update([key for key in files if fnmatch(key, exclude)])

# Remove unselected keys from selected keys.
return selected_keys - unselected_keys
23 changes: 23 additions & 0 deletions src/container_collection/manifest/find_missing_conditions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,29 @@ def find_missing_conditions(
seeds: list[int],
extensions: list[str],
) -> list[dict]:
"""
Find simulations missing or incomplete from list of conditions.
Parameters
----------
manifest
Manifest of all files for simulation series.
name
Name of the simulation series.
conditions
List of series condition dictionaries (must include unique condition
"key").
seeds
List of series random seeds.
extensions
List of single simulation output extensions.
Returns
-------
:
List of missing conditions, including random seed.
"""

missing_conditions = []

for condition in conditions:
Expand Down
39 changes: 39 additions & 0 deletions src/container_collection/manifest/summarize_manifest_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,45 @@
def summarize_manifest_files(
manifest: pd.DataFrame, name: str, conditions: list[dict], seeds: list[int]
) -> str:
"""
Summarize manifest files into a table.
Summary table is formatted as:
.. code-block:: bash
┍━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━┯━━━━━┯━━━━━━━━━━━━━┑
│ │ extension.a │ ... │ extension.n │
┝━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━┿━━━━━┿━━━━━━━━━━━━━┥
│ condition_key_a │ #/# (##.## %) │ ... │ ✓ │
│ condition_key_b │ #/# (##.## %) │ ... │ ✓ │
│ ... │... │ ... │ ... │
│ condition_key_n │ #/# (##.## %) │ ... │ ✓ │
┕━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━┷━━━━━┷━━━━━━━━━━━━━┙
For file extensions that have files or each random seed, the summary table
reports the number and percentage of random seeds. For file extensions with
only one file per conditions, a checkmark (✓) is used to indicate if the
file exists or not.
Parameters
----------
manifest
Manifest of file keys, extensions, and locations.
name
Name of the simulation series.
conditions
List of series condition dictionaries (must include unique condition
"key").
seeds
List of series random seeds.
Returns
-------
:
Manifest summary table.
"""

condition_keys = [f"{name}_{condition['key']}" for condition in conditions]
manifest_keys = manifest.set_index("KEY").filter(regex=f"^{name}", axis="index").reset_index()
extensions = manifest_keys["EXTENSION"].unique()
Expand Down
30 changes: 30 additions & 0 deletions src/container_collection/manifest/update_manifest_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@


def update_manifest_contents(location_keys: dict) -> pd.DataFrame:
"""
Update manifest using files at given keys at specified locations.
Parameters
----------
location_keys
Map of locations to list of file keys.
Returns
-------
:
Combined manifest of file keys, extensions, and locations.
"""

all_manifests = []

for location, keys in location_keys.items():
Expand All @@ -21,6 +35,22 @@ def update_manifest_contents(location_keys: dict) -> pd.DataFrame:


def make_file_manifest(location: str, keys: list[str]) -> pd.DataFrame:
"""
Create manifest for location with given list of file keys.
Parameters
----------
location
File location (local path or S3 bucket).
keys
List of file keys.
Returns
-------
:
Manifest of file keys, extensions, and locations.
"""

contents = []

for key in keys:
Expand Down
16 changes: 16 additions & 0 deletions src/container_collection/template/generate_input_contents.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,22 @@


def generate_input_contents(template: str, conditions: list[dict]) -> list[str]:
"""
Generate input contents from template and conditions.
Parameters
----------
template
Template string.
conditions
List of conditions.
Returns
-------
:
List of rendered templates.
"""

compiled_template = Template(template, undefined=StrictUndefined)
rendered_templates = [compiled_template.render(condition) for condition in conditions]
return rendered_templates
6 changes: 0 additions & 6 deletions tests/container_collection/test_main.py

This file was deleted.

0 comments on commit 05c2b5a

Please sign in to comment.