Skip to content

Commit

Permalink
make pylint happy
Browse files Browse the repository at this point in the history
  • Loading branch information
szabgab committed Dec 30, 2021
1 parent 70d5363 commit f9cbb68
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 127 deletions.
21 changes: 21 additions & 0 deletions tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,24 @@
* Easily locate where a word is defined either in the source or the target language.
* List all the occurances of a word. (Basically a smart grep)

* do we want to accpet dash (-) in the filenames or make them all use underscores?
```
if re.search(r'[^a-zA-Z0-9_.-]', img):
errors.append(f"Bad character in image: {img}")
```
* do we want to use any images that are not .jpg?
* extract words and sections between {} without the punctuation

## Lili-search features:

* given a path to a course and a string, find all the occurances of that word in the text
* In the Word/Synonyme section, in the translations
* In the Mini-Dictionary or the translations
* Among the Phrases or the translations.

* There might be words that exist in both languages (e.g. David) that will appear both in the target and in the source reports,
* but other than these words should be either in one or the other section.
* TODO: we might want to add a warning if the appear in both (that could be silenced by a flag)
* should find "mujer" in "la mujer" but not "mu" in "la mujer" maybe should indicate it is partial match


3 changes: 1 addition & 2 deletions tools/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def download_course(url, tempdir):
filename = os.path.join(tempdir.name, "course.zip")
if res.status_code == 200:
with open(filename, 'wb') as fh:
res.raw.decode_content
#res.raw.decode_content
shutil.copyfileobj(res.raw, fh)

# unzip
Expand Down Expand Up @@ -92,4 +92,3 @@ def main():


main()

17 changes: 2 additions & 15 deletions tools/lili-search.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,6 @@
import re
from librelingo_yaml_loader.yaml_loader import load_course

# Features:
# given a path to a course and a string, find all the occurances of that word in the text
# In the Word/Synonyme section, in the translations
# In the Mini-Dictionary or the translations
# Among the Phrases or the translations.

# There might be words that exist in both languages (e.g. David) that will appear both in the target and in the source reports,
# but other than these words should be either in one or the other section.
# TODO: we might want to add a warning if the appear in both (that could be silenced by a flag)

# should find "mujer" in "la mujer" but not "mu" in "la mujer" maybe should indicate it is partial match

reports = {}

def collect(key, module, skill, search_word, expressions):
Expand All @@ -29,8 +17,8 @@ def collect(key, module, skill, search_word, expressions):

def main():
if len(sys.argv) != 3:
exit(f"Usage: {sys.argv[0]} COURSE_DIR WORD")
course_path, original_search_word = sys.argv[1:]
sys.exit(f"Usage: {sys.argv[0]} COURSE_DIR WORD")
course_path, original_search_word = sys.argv[1], sys.argv[2]
search_word = original_search_word.lower()
course = load_course(course_path)
for module in course.modules:
Expand Down Expand Up @@ -64,4 +52,3 @@ def main():
for found in reports[key]:
print(found)
main()

137 changes: 72 additions & 65 deletions tools/lili.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import collections
import json
import os
import sys
import re
from jinja2 import Environment, FileSystemLoader

Expand Down Expand Up @@ -41,11 +42,6 @@ def print_ids(ids):

def load_images(images_dir):
for img in os.listdir(images_dir):
# TODO: do we want to accpet dash (-) in the filenames or make them all use underscores?
#if re.search(r'[^a-zA-Z0-9_.-]', img):
# errors.append(f"Bad character in image: {img}")

# TODO: do we want to use any images that are not .jpg?
if not img.endswith('.jpg'):
continue

Expand All @@ -61,7 +57,7 @@ def load_images(images_dir):
if match:
images['regular'].add(match.group(1))
continue
exit(f"Unhandled image: {img}")
sys.exit(f"Unhandled image: {img}")
unused_images["regular"] = set(images["regular"])
return images

Expand All @@ -84,10 +80,10 @@ def check_export(course):
output_path = 'output'
if not os.path.exists(output_path):
os.mkdir(output_path)
try:
export_course(output_path, course, settings)
except Exception as err:
errors.append(f"Exception while exporting to JSON {err}")
export_course(output_path, course, settings)
#try:
#except Exception as err:
# errors.append(f"Exception while exporting to JSON {err}")

def guess_path_to_course(path_to_course):
if not path_to_course:
Expand All @@ -106,7 +102,7 @@ def render(template_file, **args):
return html

def export_main_html_page(course, count, html_dir):
branch = 'main' # TODO how to know?
branch = 'main' # how can we know which is the default branch of a repository?
#'count', 'dictionary', 'index', 'license', 'modules', 'repository_url', 'settings', 'source_language', 'special_characters', 'target_language'
#course.modules[0].skills[0].phrases
#from ptpython.repl import embed
Expand Down Expand Up @@ -143,16 +139,16 @@ def export_json(dictionary, filename, html_dir):
with open(os.path.join(html_dir, filename), 'w') as fh:
json.dump(all_words, fh)

def export_words_html_page(course, all_words, words, dictionary, phrases, count, path, html_file):
def export_words_html_page(course, all_words, language, count, path, html_file):
html = render('words.html',
title = f"{course.target_language.name} for {course.source_language.name} speakers",
rel="",
path=path,
course=course,
all_words=all_words,
words=words,
dictionary=dictionary,
phrases=phrases,
words=language['words'],
dictionary=language['dictionary'],
phrases=language['phrases'],
)
with open(html_file, 'w') as fh:
fh.write(html)
Expand All @@ -175,8 +171,8 @@ def get_repository_url(course):
repository_url = 'https://github.com/kantord/LibreLingo'
return repository_url

def export_word_html_pages(course, all_words, words, dictionary, phrases, words_dir):
branch = 'main' # TODO how to know?
def export_word_html_pages(course, all_words, language, words_dir):
branch = 'main'

repository_url = get_repository_url(course)

Expand All @@ -185,9 +181,9 @@ def export_word_html_pages(course, all_words, words, dictionary, phrases, words_
title = f"{target_word} in {course.target_language.name}",
rel="../",
target_word=target_word,
word_translations=words[target_word],
dictionary_words=dictionary[target_word],
phrases=phrases[target_word],
word_translations=language['words'][target_word],
dictionary_words=language['dictionary'][target_word],
phrases=language['phrases'][target_word],
repository_url=repository_url,
branch=branch,
course=course,
Expand All @@ -196,40 +192,57 @@ def export_word_html_pages(course, all_words, words, dictionary, phrases, words_
with open(os.path.join(words_dir, target_word.lower() + '.html'), 'w') as fh:
fh.write(html)

def export_to_html(course, target_words, target_dictionary, target_phrases, source_words, source_dictionary, source_phrases, count, html_dir):
def export_to_html(course, target, source, count, html_dir):
if not os.path.exists(html_dir):
os.mkdir(html_dir)
for path in ['target', 'source']:
words_dir = os.path.join(html_dir, path)
if not os.path.exists(words_dir):
os.mkdir(words_dir)
all_target_words = set(target_words.keys()).union(set(target_dictionary.keys())).union(set(target_phrases.keys()))
all_target_words = set(target['words'].keys()).union(set(target['dictionary'].keys())).union(set(target['phrases'].keys()))
count['target_words'] = len(all_target_words)

all_source_words = set(source_words.keys()).union(set(source_dictionary.keys())).union(set(source_phrases.keys()))
all_source_words = set(source['words'].keys()).union(set(source['dictionary'].keys())).union(set(source['phrases'].keys()))
count['source_words'] = len(all_source_words)

export_json(source_dictionary, 'source-to-target.json', html_dir)
export_json(target_dictionary, 'target-to-source.json', html_dir)
export_json(source['dictionary'], 'source-to-target.json', html_dir)
export_json(target['dictionary'], 'target-to-source.json', html_dir)

export_main_html_page(course, count, html_dir)
export_words_html_page(course, all_target_words, target_words, target_dictionary, target_phrases, count, 'target', os.path.join(html_dir, 'target.html'))
export_words_html_page(course, all_source_words, source_words, source_dictionary, source_phrases, count, 'source', os.path.join(html_dir, 'source.html'))
export_word_html_pages(course, all_target_words, target_words, target_dictionary, target_phrases, os.path.join(html_dir, 'target'))
export_word_html_pages(course, all_source_words, source_words, source_dictionary, source_phrases, os.path.join(html_dir, 'source'))
export_words_html_page(course, all_target_words, target, count, 'target', os.path.join(html_dir, 'target.html'))
export_words_html_page(course, all_source_words, source, count, 'source', os.path.join(html_dir, 'source.html'))
export_word_html_pages(course, all_target_words, target, os.path.join(html_dir, 'target'))
export_word_html_pages(course, all_source_words, source, os.path.join(html_dir, 'source'))
with open(os.path.join(html_dir, 'stats.json'), 'w') as fh:
json.dump(count, fh)

def clean(text):
return re.sub(r'[{}.!?¡¿",/]', '', text)


def _collect_phrases(skill, count, target, source):
for phrase in skill.phrases:
for sentence in phrase.in_target_language:
count['target_phrases'] += 1
for word in clean(sentence).split(' '):
target['phrases'][word.lower()].append({'sentence': sentence, 'skill': skill})
for sentence in phrase.in_source_language:
count['source_phrases'] += 1
for word in clean(sentence).split(' '):
source['phrases'][word.lower()].append({'sentence': sentence, 'skill': skill})

def generate_html(course, html_dir):
target_words = collections.defaultdict(list)
source_words = collections.defaultdict(list)
target_dictionary = collections.defaultdict(list)
source_dictionary = collections.defaultdict(list)
target_phrases = collections.defaultdict(list)
source_phrases = collections.defaultdict(list)
target = {
'words' : collections.defaultdict(list),
'dictionary': collections.defaultdict(list),
'phrases' : collections.defaultdict(list),
}
source = {
'words' : collections.defaultdict(list),
'dictionary': collections.defaultdict(list),
'phrases' : collections.defaultdict(list),
}

count = {
'target_phrases': 0,
'source_phrases': 0,
Expand All @@ -239,43 +252,21 @@ def generate_html(course, html_dir):
for skill in module.skills:
for word in skill.words:
for txt in word.in_target_language:
target_words[clean(txt).lower()].append({'word': word, 'skill': skill})
target['words'][clean(txt).lower()].append({'word': word, 'skill': skill})
for txt in word.in_source_language:
source_words[clean(txt).lower()].append({'word': word, 'skill': skill})
source['words'][clean(txt).lower()].append({'word': word, 'skill': skill})

for left, right, target_to_source in skill.dictionary:
if target_to_source:
target_dictionary[clean(left).lower()].append({'word': right, 'skill': skill})
target['dictionary'][clean(left).lower()].append({'word': right, 'skill': skill})
else:
source_dictionary[clean(left).lower()].append({'word': right, 'skill': skill})

for phrase in skill.phrases:
for sentence in phrase.in_target_language:
count['target_phrases'] += 1
# TODO extract words and sections between {} without the punctuation
for word in clean(sentence).split(' '):
target_phrases[word.lower()].append({'sentence': sentence, 'skill': skill})
for sentence in phrase.in_source_language:
count['source_phrases'] += 1
# TODO extract words and sections between {} without the punctuation
for word in clean(sentence).split(' '):
source_phrases[word.lower()].append({'sentence': sentence, 'skill': skill})

export_to_html(course, target_words, target_dictionary, target_phrases, source_words, source_dictionary, source_phrases, count, html_dir)

def main():
args = get_args()
source['dictionary'][clean(left).lower()].append({'word': right, 'skill': skill})

path_to_course = guess_path_to_course(args.course)
try:
course = load_course(path_to_course)
except Exception as err:
exit(f"Could not load course {err}")
_collect_phrases(skill, count, target, source)

images = None
if args.images:
images = load_images(args.images)
export_to_html(course, target, source, count, html_dir)

def collect_ids_and_names(args, course):
skill_ids = {}
skill_names = {}
for module in course.modules:
Expand All @@ -294,6 +285,22 @@ def main():
'module': module,
'skill': skill,
}
return skill_ids

def main():
args = get_args()

path_to_course = guess_path_to_course(args.course)
course = load_course(path_to_course)
#try:
#except Exception as err:
# sys.exit(f"Could not load course {err}")

#images = None
#if args.images:
# images = load_images(args.images)

skill_ids = collect_ids_and_names(args, course)

if args.export:
check_export(course)
Expand All @@ -318,7 +325,7 @@ def main():
print('------------------ ERRORS ---------------------')
for err in errors:
print(err)
exit(1)
sys.exit(1)


main()
Loading

0 comments on commit f9cbb68

Please sign in to comment.