Skip to content

Commit

Permalink
Readds summary reports
Browse files Browse the repository at this point in the history
  • Loading branch information
catusphan committed Dec 18, 2024
1 parent b7973a7 commit fe5f9b8
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 49 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/release_all.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ concurrency:

jobs:
build:
if: "!contains(github.event.head_commit.message, 'AUTO')"

runs-on: ubuntu-22.04

steps:
Expand Down
4 changes: 1 addition & 3 deletions .github/workflows/release_all_external.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ concurrency:

jobs:
build:
if: "!contains(github.event.head_commit.message, 'AUTO')"

runs-on: ubuntu-22.04

steps:
Expand Down Expand Up @@ -69,7 +67,7 @@ jobs:
run: |
source .venv/bin/activate &&
uv pip list &&
uv run python ./bin/convert_all.py --input_folder=$INPUT_DIR --output_folder=$OUTPUT_DIR --extension=tab --fileter=C
uv run python ./bin/convert_all.py --input_folder=$INPUT_DIR --output_folder=$OUTPUT_DIR --extension=tab --fileter=CCC
- name: Report the results
run: |
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,9 @@ all:
test:
echo "Run test the venv"
uv run python ./bin/test.py

dict_new_stats:
uv run python ./bin/dict_summary.py --dict_dir=dict --read_only=no

dict_stats:
uv run python ./bin/dict_summary.py --dict_dir=dict --read_only=yes
141 changes: 97 additions & 44 deletions bin/dict_summary.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Generates summary of available dictionaries"""

import argparse
import glob
import json
import os

Expand Down Expand Up @@ -88,11 +89,13 @@ def get_downloadable_files(filebase, tag_download, dict_dir):
return download_links


def generate_summary(dict_dir):
def generate_summary(dict_dir, output_dir):
"""Generate a list of dictionaries containing metadata for each .dfo file."""
print(f"Generating summary data for {dict_dir}")

data = []
needed_files = []
num_dict_found = 0

for filename in os.listdir(dict_dir):
if filename.endswith(".dfo"):
Expand Down Expand Up @@ -133,17 +136,60 @@ def generate_summary(dict_dir):
}
)

num_dict_found += 1

# Save the list of dictionaries as a JSON file
json_path = os.path.join(dict_dir, "dict_summary.json")
with open(json_path, 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, ensure_ascii=False, indent=4)
with open(json_path, "w", encoding="utf-8") as json_file:
json.dump(data, json_file, ensure_ascii=False, indent=4)

print(f"Data file writtend to '{json_path}'.")

existing_files = sorted(glob.glob(os.path.join(output_dir, "*.*")))

missing_files = sorted(set(needed_files) - set(existing_files))

print("JSON file 'dict_summary.json' has been generated.")

files_status = "# Status report\n\n"
files_status += "## Counts\n\n"
files_per_format = len(SUPPORTED_EXTENSIONS)
existing_dicts = (len(existing_files) - files_per_format) / files_per_format
missing_dicts = len(missing_files) / files_per_format
if existing_dicts < 0:
existing_dicts = 0
missing_dicts -= 1

mismatched_dicts = num_dict_found - (existing_dicts + missing_dicts)

assert num_dict_found == len(data)
assert files_per_format * (num_dict_found + 1) == len(needed_files)

print(f"Data file writtend to '{json_path}'.")
files_status += f"- There are **{len(data)}** dict files.\n\n"
files_status += f"- Total NEEDED files: **{len(needed_files)}**\n\n"
files_status += f"- Total EXISTING files: **{len(existing_files)}** "
files_status += f"- or **{existing_dicts:.1f}** dictionaries. "
if len(existing_files) % files_per_format != 0:
files_status += "ABNORMAL NUMBER of files. Some dict has **missing format(s)**. Check missing files list for details.\n\n"
else:
files_status += "The number of files looks NORMAL.\n\n"

files_status += f"- Total MISSING files: {len(missing_files)}** "
files_status += f"(or **{missing_dicts:.1f}** dictionaries which is {'CORRECT' if mismatched_dicts == 0 else 'IN-CORRECT'})\n\n"

files_status_details = "# Errors\n"

files_status_details += f"## Missing files list\n\n"
for item in missing_files:
files_status_details += f"\t{item}\n"

return data
print(files_status_details)
print(files_status)

return data, files_status, files_status_details

def generate_markdown_table(data, extensions, columns):

def generate_markdown_table(data, files_status, files_status_details, extensions, columns):
"""Generate a markdown table from the data."""
print(f"Generating report for {len(data)} dictionaries for {extensions}")

Expand Down Expand Up @@ -207,51 +253,58 @@ def generate_markdown_table(data, extensions, columns):

markdown.append(line)

markdown.insert(0, files_status_details)
markdown.insert(0, files_status)

return "\n".join(markdown)


def main():
"""Main function to parse arguments and run the processes.""" # noqa: D202, D401

# Parse command-line arguments
parser = argparse.ArgumentParser(description="Generate a dictionary summary.")
parser.add_argument("--dict_dir", type=str, nargs="?", default="dict", help="The directory containing the dictionary files (default is 'dict').")
parser.add_argument("--outfile", type=str, nargs="?", default="dict_summary.md", help="The output report file name (default is 'dict_summary.md').")
parser.add_argument("--extensions", type=str, nargs="?", default=None, help="The extensions that need included in the report. None means all.")
parser.add_argument("--columns", type=str, nargs="?", default=None, help="The columns that will be kept (Other than the download links).")
parser.add_argument("--read_only", choices=["yes", "no"], default="no", required=False, help="Read data or create it.")

args = parser.parse_args()

print(args)

# Generate the summary data and save it as a JSON file
ext_str = args.extensions
col_str = args.columns
read_only = args.read_only
dict_dir = args.dict_dir
outfile = args.outfile

extensions = list(SUPPORTED_EXTENSIONS.keys()) if not ext_str else [item.strip() for item in ext_str.split(",")]
columns = list(COLUMNS.keys()) if not col_str else [item.strip() for item in col_str.split(",")]

if read_only == "no":
data = generate_summary(dict_dir)
else:
json_path = os.path.join(dict_dir, "dict_summary.json")
with open(json_path, 'r', encoding='utf-8') as json_file:
data = json.load(json_file)
"""Main function to parse arguments and run the processes.""" # noqa: D202, D401

# Parse command-line arguments
parser = argparse.ArgumentParser(description="Generate a dictionary summary.")
parser.add_argument("--dict_dir", type=str, nargs="?", default="dict", help="The directory containing the dictionary files (default is 'dict').")
parser.add_argument("--outfile", type=str, nargs="?", default="dict_summary.md", help="The output report file name (default is 'dict_summary.md').")
parser.add_argument("--output_dir", type=str, nargs="?", default="output", help="The output dir for all the dict results.")
parser.add_argument("--extensions", type=str, nargs="?", default=None, help="The extensions that need included in the report. None means all.")
parser.add_argument("--columns", type=str, nargs="?", default=None, help="The columns that will be kept (Other than the download links).")
parser.add_argument("--read_only", choices=["yes", "no"], default="no", required=False, help="Read data or create it.")

args = parser.parse_args()

print(args)

# Generate the summary data and save it as a JSON file
ext_str = args.extensions
col_str = args.columns
read_only = args.read_only
dict_dir = args.dict_dir
outfile = args.outfile
output_dir = args.output_dir

extensions = list(SUPPORTED_EXTENSIONS.keys()) if not ext_str else [item.strip() for item in ext_str.split(",")]
columns = list(COLUMNS.keys()) if not col_str else [item.strip() for item in col_str.split(",")]

files_status = ""
files_status_details = ""
if read_only == "no":
data, files_status, files_status_details = generate_summary(dict_dir, output_dir)
else:
json_path = os.path.join(dict_dir, "dict_summary.json")
with open(json_path, "r", encoding="utf-8") as json_file:
data = json.load(json_file)

# Generate the markdown table from the JSON data
markdown_table = generate_markdown_table(data, extensions, columns)
# Generate the markdown table from the JSON data
markdown_table = generate_markdown_table(data, files_status, files_status_details, extensions, columns)

# Save the markdown table to a .md file
markdown_file = os.path.join(dict_dir, outfile)
with open(markdown_file, 'w', encoding='utf-8') as file:
file.write(markdown_table)
print(f"Data file written to: {markdown_file}")
# Save the markdown table to a .md file
markdown_file = os.path.join(dict_dir, outfile)
with open(markdown_file, "w", encoding="utf-8") as file:
file.write(markdown_table)
print(f"Data file written to: {markdown_file}")

print(f"Summary markdown file '{outfile}' has been generated.")
print(f"Summary markdown file '{outfile}' has been generated.")


if __name__ == "__main__":
Expand Down

0 comments on commit fe5f9b8

Please sign in to comment.