Skip to content

Commit

Permalink
fix interactive bug
Browse files Browse the repository at this point in the history
  • Loading branch information
mhmohona committed Sep 20, 2024
1 parent 8504672 commit 869684e
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 83 deletions.
121 changes: 54 additions & 67 deletions src/scribe_data/cli/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,8 @@
from scribe_data.cli.cli_utils import language_map
from scribe_data.load.data_to_sqlite import data_to_sqlite
from scribe_data.utils import (
DEFAULT_CSV_EXPORT_DIR,
DEFAULT_JSON_EXPORT_DIR,
DEFAULT_SQLITE_EXPORT_DIR,
DEFAULT_TSV_EXPORT_DIR,
get_language_iso,
)

Expand All @@ -43,6 +41,7 @@ def export_json(
) -> None:
normalized_language = language_map.get(language.lower())
language_capitalized = language.capitalize()

if not normalized_language:
raise ValueError(f"Language '{language_capitalized}' is not recognized.")

Expand All @@ -64,7 +63,6 @@ def export_json(
print(f"Error reading '{data_file}': {e}")
return

# Adjust the output directory for JSON exports.
json_output_dir = (
output_dir
/ DEFAULT_JSON_EXPORT_DIR
Expand All @@ -91,73 +89,62 @@ def export_json(


def convert_to_csv_or_tsv(
file: Path,
output_dir: Path,
output_type: str,
overwrite: bool,
language: str, data_type: list, output_dir: Path, overwrite: bool, output_type: str
) -> None:
if not file.exists():
print(f"No data found for {output_type} conversion.")
return

try:
with file.open("r") as f:
data = json.load(f)

except (IOError, json.JSONDecodeError) as e:
print(f"Error reading '{file}': {e}")
return

if output_type == "csv":
delimiter = ","
file_extension = "csv"
output_subdirectory = DEFAULT_CSV_EXPORT_DIR

elif output_type == "tsv":
delimiter = "\t"
file_extension = "tsv"
output_subdirectory = DEFAULT_TSV_EXPORT_DIR

else:
print(f"Unsupported output type '{output_type}'.")
return

# Adjust the output directory for CSV exports.
final_output_dir = (
output_dir / output_subdirectory / file.split("/")[-2].capitalize()
)
final_output_dir.mkdir(parents=True, exist_ok=True)

output_file = final_output_dir / f"{file.split('/')[-2]}.{file_extension}"
if output_file.exists() and not overwrite:
user_input = input(f"File '{output_file}' already exists. Overwrite? (y/n): ")
if user_input.lower() != "y":
print(f"Skipping {file}")
return

try:
with output_file.open("w", newline="", encoding="utf-8") as file:
writer = csv.writer(file, delimiter=delimiter)
if isinstance(data, dict):
for key, value in data.items():
writer.writerow([key, value])

elif isinstance(data, list):
for item in data:
if isinstance(item, dict):
writer.writerow(item.values())

else:
writer.writerow([item])

else:
print(f"Unsupported data format for {output_type} export.")

except IOError as e:
print(f"Error writing to '{output_file}': {e}")
normalized_language = language_map.get(language.lower())
if not normalized_language:
print(f"Language '{language}' is not recognized.")
return

print(f"Data for '{file}' written to '{output_file}'")
for dtype in data_type:
file_path = (
DATA_DIR / normalized_language["language"].capitalize() / f"{dtype}.json"
)
if not file_path.exists():
print(f"No data found for {dtype} conversion at '{file_path}'.")
continue

try:
with file_path.open("r") as f:
data = json.load(f)
except (IOError, json.JSONDecodeError) as e:
print(f"Error reading '{file_path}': {e}")
continue

delimiter = "," if output_type == "csv" else "\t"
file_extension = output_type

final_output_dir = output_dir / normalized_language["language"].capitalize()
final_output_dir.mkdir(parents=True, exist_ok=True)

output_file = final_output_dir / f"{dtype}.{file_extension}"
if output_file.exists() and not overwrite:
user_input = input(
f"File '{output_file}' already exists. Overwrite? (y/n): "
)
if user_input.lower() != "y":
print(f"Skipping {dtype}")
continue

try:
with output_file.open("w", newline="", encoding="utf-8") as file:
writer = csv.writer(file, delimiter=delimiter)
if isinstance(data, dict):
writer.writerow(data.keys())
writer.writerow(data.values())
elif isinstance(data, list):
for item in data:
if isinstance(item, dict):
writer.writerow(item.values())
else:
writer.writerow([item])
else:
print(f"Unsupported data format for {output_type} export.")
except IOError as e:
print(f"Error writing to '{output_file}': {e}")
continue

print(f"Data for '{dtype}' written to '{output_file}'")


def convert_to_sqlite(
Expand Down
53 changes: 37 additions & 16 deletions src/scribe_data/cli/interactive.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@
language_metadata,
)
from scribe_data.cli.get import get_data
from scribe_data.utils import DEFAULT_JSON_EXPORT_DIR
from scribe_data.utils import (
DEFAULT_JSON_EXPORT_DIR,
DEFAULT_CSV_EXPORT_DIR,
DEFAULT_TSV_EXPORT_DIR,
)


def get_selection(user_input: str, options: list[str]) -> list[str]:
Expand Down Expand Up @@ -113,15 +117,34 @@ def get_output_options() -> dict:
dict
Output options including type, directory, and overwrite flag
"""
valid_types = ["json", "csv", "tsv"]
output_type = (
input("File type to export (json, csv, tsv) [json]: ").lower() or "json"
input("File type to export (json, csv, tsv) [json]: ").strip().lower() or "json"
)

while output_type not in valid_types:
print(
f"Invalid output type '{output_type}'. Please choose from 'json', 'csv', or 'tsv'."
)
output_type = (
input("File type to export (json, csv, tsv) [json]: ").strip().lower()
or "json"
)

if output_type == "csv":
default_export_dir = DEFAULT_CSV_EXPORT_DIR
elif output_type == "tsv":
default_export_dir = DEFAULT_TSV_EXPORT_DIR
else:
default_export_dir = DEFAULT_JSON_EXPORT_DIR

output_dir = (
input(f"Export directory path [./{DEFAULT_JSON_EXPORT_DIR}]: ")
or f"./{DEFAULT_JSON_EXPORT_DIR}"
input(f"Export directory path [./{default_export_dir}]: ").strip()
or f"./{default_export_dir}"
)
overwrite = (
input("Overwrite existing data without asking (y/n) [n]: ").lower() == "y"
input("Overwrite existing data without asking (y/n) [n]: ").strip().lower()
== "y"
)

return {"type": output_type, "dir": output_dir, "overwrite": overwrite}
Expand Down Expand Up @@ -152,17 +175,15 @@ def run_interactive_mode():
f"Data will be exported as {output_options['type'].upper()} files to '{output_options['dir']}'."
)

# Convert lists to comma-separated strings for get_data.
languages_str = ",".join(selected_languages)
data_types_str = ",".join(selected_data_types)

get_data(
languages_str,
data_types_str,
output_options["dir"],
output_options["overwrite"],
output_options["type"],
)
for language in selected_languages:
for data_type in selected_data_types:
get_data(
language,
data_type,
output_options["dir"],
output_options["overwrite"],
output_options["type"],
)


# This function can be called from main.py when the -i or --interactive flag is used.
Expand Down

0 comments on commit 869684e

Please sign in to comment.