From 5fa255dee2a9915281fdb8ce89dc3e7abd9d6902 Mon Sep 17 00:00:00 2001 From: Parsa Date: Sun, 24 Oct 2021 23:56:05 -0700 Subject: [PATCH 01/14] WEBWORK-16 + WEBWORK-18: Added comments to code, added progress bar to code output in terminal, fixed a few minor bugs --- src/problem_bank_scripts/webwork_to_md.py | 49 +++++++++++++++++------ 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index 436b67d8..e698ef61 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -5,6 +5,7 @@ import re import time from shutil import copy2 +import sys # loop through every file in the dir root_path = '../../webwork-open-problem-library/Contrib/BrockPhysics/College_Physics_Urone/' @@ -35,7 +36,6 @@ partial_answer_src = "showPartialCorrectAnswers" # extract file structure from source directory (handles ALL sub-directories) -# for loop runs based # of folders in src for root, dirs, files in os.walk(root_path): for name in dirs: dest_folder = os.path.join(root, name).removeprefix(root_path) @@ -361,6 +361,18 @@ def extract_problem_solution(problem_solution): return question_solution +# a dynamic progress bar source: https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 +def progress(count, total, status=''): + bar_len = 60 + filled_len = int(round(bar_len * count / float(total))) + + percents = round(100.0 * count / float(total), 1) + bar = '=' * filled_len + '-' * (bar_len - filled_len) + + sys.stdout.write('[%s] %s%s -- %s\r' % (bar, percents, '%', status)) + sys.stdout.flush() + + # for loop runs based # of folders in src for root, dirs, files in os.walk(root_path): # create dest file structure based on source directory @@ -374,16 +386,13 @@ def extract_problem_solution(problem_solution): for source_filepath in source_files: try: - dest_file_path = source_filepath[78:source_filepath.rfind('/')] + # start timer for processing file + file_start_time = time.process_time() + # extract and build information directory + dest_file_path = source_filepath.split('/')[-2] filename = source_filepath[source_filepath.rfind('/')+1:-3] folder_dir = source_filepath[:source_filepath.rfind('/')] - file_start_time = time.process_time() file_dir = source_filepath[source_filepath.find("Contrib"):] - question_file = open(source_filepath, 'r') - file_contents = question_file.read() - - file_contents_dic = split_file(file_contents) - metadata_dic = metadata_extract(file_contents_dic['metadata']) dir_info = { 'filename': filename, 'file_dir': file_dir, @@ -391,24 +400,39 @@ def extract_problem_solution(problem_solution): 'root_dest_folder': root_dest_folder, 'dest_file_path': dest_file_path } + # each question has a its own unique folder named after the fiile itself i.e question file NU_123.md is within NU_123 folder destination_file_path = root_dest_folder + dest_file_path + "/" + filename + "/" + Path(destination_file_path).mkdir(parents=True, exist_ok=True) + # open and read question file + question_file = open(source_filepath, 'r') + file_contents = question_file.read() + # split content of the question file into sections + file_contents_dic = split_file(file_contents) + # extract metadata from the question file + metadata_dic = metadata_extract(file_contents_dic['metadata']) + # extract question body from the question file question_body = file_contents_dic['question_body'] + # extract question images from the question body image_dic = image_extract(question_body) + # extract question item such as text, part #s, units from the question body question_extract = problem_extract(question_body, image_dic['image_alt_text']) question_text = question_extract['question_text'] question_parts = question_extract['question_parts'] question_units = question_extract['question_units'] + # determine question type question_formats = extract_problem_type(file_contents, dir_info['filename'])['question_type'] + # extract question solution from the question content question_solution = extract_problem_solution(file_contents_dic['question_solution']) - Path(destination_file_path).mkdir(parents=True, exist_ok=True) + # send all dictionaries to yaml_dump to create yaml files yaml_dump(dir_info, metadata_dic, question_formats, image_dic, question_text, question_units, question_parts, question_solution, destination_file_path) + # end timer for processing file end_file_time = time.process_time() + # calculate total time for processing file file_process_time = end_file_time - file_start_time - counterString = '#' + str(counter + 1) + ' - [' + str(round(file_process_time, 5)) + '] ' - currentFile = root_dest_folder + dest_file_path + "/" + filename + # print/update progress bar counter += 1 - print(counterString + currentFile) + progress(counter, len(source_files), status="Files Processed: " + str(counter) + "/" + str(len(source_files))) except Exception as e: print(e) @@ -417,5 +441,6 @@ def extract_problem_solution(problem_solution): # ------------------------ STATS ------------------------ # total_end_time = time.process_time() process_time_seconds = total_end_time - total_start_time +print('\n---') print('total time:', round(process_time_seconds / 60, 2), 'minutes,', round(process_time_seconds, 2), 'seconds') print('avg time per each file:', round(process_time_seconds / counter, 2), 'seconds [', counter, '] files') From 71bdc9137c3d616021a67ef19e9d68f7e016d9c4 Mon Sep 17 00:00:00 2001 From: Parsa Date: Fri, 29 Oct 2021 11:18:02 -0700 Subject: [PATCH 02/14] WEBWORK-16: Added description, param and return for each function of the script --- src/problem_bank_scripts/webwork_to_md.py | 102 +++++++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index e698ef61..7a87e739 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -43,6 +43,11 @@ def split_file(file_content): + """ + description: splits the file into sections based on the keywords + @param file_content: + @return: dictionary of lists that contain problem parts + """ # TODO: once all functions are completed, convert global variables above into local variables # split the file into bite-size pieces to increase speed and reduce bugs metadata_content = file_content[:file_content.find(metadata_end_src)] @@ -62,6 +67,11 @@ def split_file(file_content): def metadata_extract(metadata_content): + """ + description: extracts metadata variables from the metadata section of the file + @param metadata_content: + @return: dictionary of metadata + """ metadata = "## " chapter_src = "DBchapter" section_src = "DBsection" @@ -97,6 +107,12 @@ def metadata_extract(metadata_content): def determine_problem_type(question_ans, filename): + """ + description: determines the type of problem + @param question_ans: + @param filename: + @return: dictionary of problem type + """ # determine what type of question is based on the ANS(type) numerical_type = "num_cmp" functional_type = "fun_cmp" @@ -133,6 +149,11 @@ def determine_problem_type(question_ans, filename): def server(question_solution): + """ + description: function bundles up the problem's solution in python code + @param question_solution: + @return: dictionary of server containing various elements such as import, generate and prepare. + """ # server variables server_imports = """ import random @@ -158,7 +179,21 @@ def server(question_solution): 'parse': server_parse, 'grade': server_grade} + def yaml_dump(directory_info, metadata, question_format, image_dic, question_text, question_units, question_parts, question_solution, destination_file_path): + """ + description: all problem sections are bundled up and dumped into a markdown file (created) + @param directory_info: + @param metadata: + @param question_format: + @param image_dic: + @param question_text: + @param question_units: + @param question_parts: + @param question_solution: + @param destination_file_path: + @return: none + """ # This solution is copied from this SO answer: https://stackoverflow.com/a/45004775/2217577 yaml.SafeDumper.org_represent_str = yaml.SafeDumper.represent_str @@ -222,6 +257,11 @@ def repr_str(dumper, data): def get_part_type(part_type): + """ + description: determines the type of each question part + @param part_type: + @return: dictionary containing type of question i.e numerical, text, etc. + """ return {"type": part_type, "pl-customizations": {"weight": "1", @@ -230,6 +270,11 @@ def get_part_type(part_type): def image_extract(question_content): + """ + description: extracts images from question content + @param question_content: + @return: dictionary containing image name and image alt text and image line containing both + """ image_src = "image(" image_line = [] image_alt_text = [] @@ -256,6 +301,12 @@ def image_extract(question_content): def problem_extract(question_body, image_alt_text): + """ + description: extracts the question text, parts and units from the question body + @param question_body: + @param image_alt_text: + @return: dictionary containing question text, parts and units + """ hint = '' question_units = '' question_raw = [] @@ -295,17 +346,34 @@ def problem_extract(question_body, image_alt_text): def append_part_counter(part_counter, part_headers): + """ + description: outputs the unique question parts + @param part_counter: + @param part_headers: + @return: unique part counter + """ if part_counter not in part_headers: part_headers.append(part_counter) return part_headers def extract_problem_type(problem_subsection, filename): + """ + description: extracts the problem solution type from each problem subsection + @param problem_subsection: + @param filename: + @return: extract problem format and then call the determine_problem_type function + """ question_format_raw = re.findall("(ANS\(.+?\);)", str(problem_subsection)) return determine_problem_type(question_format_raw, filename) def help_problem_extract_ans_units(problem_subsection): + """ + description: extracts the final answer units and each section of the question + @param problem_subsection: + @return: dictionary containing question sections and final answer units + """ final_ans_units = '' section_clean = '' if not problem_subsection.startswith("\\{ image") and not problem_subsection.endswith(") \\}"): @@ -320,6 +388,11 @@ def help_problem_extract_ans_units(problem_subsection): def help_problem_extract_ans_type(problem_subsection): + """ + description: extracts the question's answer type and returns the problem text without the answer type + @param problem_subsection: + @return: return dictionary containing question answer type and problem text without answer type + """ ans_type = [] problem_ans_type_removed = [] if problem_subsection.startswith("END_TEXT"): @@ -333,6 +406,12 @@ def help_problem_extract_ans_type(problem_subsection): def help_problem_extract_append(problem_subsection, final_dic): + """ + description: extracts the question text clean of any PEAL syntax and appends it to the final dictionary + @param problem_subsection: + @param final_dic: + @return: list that contains clean problems without any PEARL syntax in them + """ if len(problem_subsection) > 1: problem_stripped = problem_subsection.replace('\\', '').replace('textrm', '').replace('{', '').replace('}', '')\ .replace('·', '$\\cdot$').replace('END_TEXT', '').replace('BEGIN_TEXT', '').strip() @@ -346,6 +425,11 @@ def help_problem_extract_append(problem_subsection, final_dic): def extract_problem_solution(problem_solution): + """ + description: extracts the problem solution from the problem solution subsection + @param problem_solution: + @return: list containing problem solution + """ question_solution = [] for solution in problem_solution: @@ -361,8 +445,14 @@ def extract_problem_solution(problem_solution): return question_solution -# a dynamic progress bar source: https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 + def progress(count, total, status=''): + """ + description: prints a dynamic progress bar source: https://gist.github.com/vladignatyev/06860ec2040cb497f0f3 + @param count: + @param total: + @param status: + """ bar_len = 60 filled_len = int(round(bar_len * count / float(total))) @@ -393,6 +483,15 @@ def progress(count, total, status=''): filename = source_filepath[source_filepath.rfind('/')+1:-3] folder_dir = source_filepath[:source_filepath.rfind('/')] file_dir = source_filepath[source_filepath.find("Contrib"):] + """ + Example of dir_info output + { + 'filename': 'NU_U17-33-02-002', + 'file_dir': 'Contrib/BrockPhysics/College_Physics_Urone/33.Particle_Physics/33-02.Four_Basic_Forces/NU_U17-33-02-002.pg', + 'folder_dir': '../../../webwork-open-problem-library/Contrib/BrockPhysics/College_Physics_Urone/33.Particle_Physics/33-02.Four_Basic_Forces', + 'dest_file_path': '33-02.Four_Basic_Forces', + 'root_dest_folder': 'source/College_Physics_Urone/'} + """ dir_info = { 'filename': filename, 'file_dir': file_dir, @@ -433,7 +532,6 @@ def progress(count, total, status=''): # print/update progress bar counter += 1 progress(counter, len(source_files), status="Files Processed: " + str(counter) + "/" + str(len(source_files))) - except Exception as e: print(e) pass From 7ff1ef1b40c3c7c8a2c3e1c25b376133ebb52275 Mon Sep 17 00:00:00 2001 From: Firas Moosvi Date: Thu, 23 Jun 2022 22:50:39 -0700 Subject: [PATCH 03/14] revert the arguments --- src/problem_bank_scripts/webwork_to_md.py | 33 ++++++++++++++++++----- 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index 7a87e739..628c2082 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -1,3 +1,14 @@ +""" +@Author: Parsa Rajabi (@parsa-rajabi) +@Created: 2021 +@Description: Converts webwork files from .PL to markdown .MD + +Usage: + webwork_to_md.py + +Arguments: + source_path Root of all the pl source files. +""" import os from pathlib import Path from pprint import pprint @@ -6,10 +17,20 @@ import time from shutil import copy2 import sys +from docopt import docopt + +# read passed in arguments +args = docopt(__doc__) +# set source_path with passed in path +source_path = args[''] +# OLD source_path = '../../../webwork-open-problem-library/Contrib/BrockPhysics/College_Physics_Urone/' +# check if source_path ends with a backslash, if not, raise an error (this is needed to create the correct dest_folder) +if source_path and source_path[-1] != '/': + raise Exception('Please make sure your source_path argument ends with a backslash i.e. /') -# loop through every file in the dir -root_path = '../../webwork-open-problem-library/Contrib/BrockPhysics/College_Physics_Urone/' -root_dest_folder = '../../instructor_physics_bank/source_ww/output_md/College_Physics_Urone/' +root_dest_folder = 'source/' + source_path.split('/')[-2] + '/' +# Creates root_dest_folder if it doesn't exist +Path(root_dest_folder).mkdir(parents=True, exist_ok=True) # variable declaration counter = 0 @@ -36,9 +57,9 @@ partial_answer_src = "showPartialCorrectAnswers" # extract file structure from source directory (handles ALL sub-directories) -for root, dirs, files in os.walk(root_path): +for root, dirs, files in os.walk(source_path): for name in dirs: - dest_folder = os.path.join(root, name).removeprefix(root_path) + dest_folder = os.path.join(root, name).removeprefix(source_path) src_dirs.append(root_dest_folder + dest_folder) @@ -464,7 +485,7 @@ def progress(count, total, status=''): # for loop runs based # of folders in src -for root, dirs, files in os.walk(root_path): +for root, dirs, files in os.walk(source_path): # create dest file structure based on source directory for dir_path in src_dirs: Path(dir_path).mkdir(parents=True, exist_ok=True) From 201b86dc1c78535d2354ad5fd5eb51b409c6c6b3 Mon Sep 17 00:00:00 2001 From: Parsa Date: Mon, 8 Nov 2021 21:36:24 -0800 Subject: [PATCH 04/14] WEBWORK-22: Fixed double directory bug, fixed typo in argument declaration --- src/problem_bank_scripts/webwork_to_md.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index 628c2082..2a44259e 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -22,7 +22,7 @@ # read passed in arguments args = docopt(__doc__) # set source_path with passed in path -source_path = args[''] +source_path = args[''] # OLD source_path = '../../../webwork-open-problem-library/Contrib/BrockPhysics/College_Physics_Urone/' # check if source_path ends with a backslash, if not, raise an error (this is needed to create the correct dest_folder) if source_path and source_path[-1] != '/': @@ -486,9 +486,6 @@ def progress(count, total, status=''): # for loop runs based # of folders in src for root, dirs, files in os.walk(source_path): - # create dest file structure based on source directory - for dir_path in src_dirs: - Path(dir_path).mkdir(parents=True, exist_ok=True) # iterate through each file for file in files: if file.endswith('.pg'): @@ -500,7 +497,7 @@ def progress(count, total, status=''): # start timer for processing file file_start_time = time.process_time() # extract and build information directory - dest_file_path = source_filepath.split('/')[-2] + dest_file_path = source_filepath[:source_filepath.rfind('/')].removeprefix(source_path) filename = source_filepath[source_filepath.rfind('/')+1:-3] folder_dir = source_filepath[:source_filepath.rfind('/')] file_dir = source_filepath[source_filepath.find("Contrib"):] @@ -562,4 +559,4 @@ def progress(count, total, status=''): process_time_seconds = total_end_time - total_start_time print('\n---') print('total time:', round(process_time_seconds / 60, 2), 'minutes,', round(process_time_seconds, 2), 'seconds') -print('avg time per each file:', round(process_time_seconds / counter, 2), 'seconds [', counter, '] files') +print('avg time per each file:', round(process_time_seconds / counter, 2), 'seconds [', counter, '] files') \ No newline at end of file From 5979208ac62abaf1782f1857dc06c12707828f46 Mon Sep 17 00:00:00 2001 From: Parsa Date: Mon, 8 Nov 2021 21:41:23 -0800 Subject: [PATCH 05/14] WEBWORK-18: Added logging to script with INFO and ERRORs --- src/problem_bank_scripts/webwork_to_md.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index 2a44259e..081b3fcf 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -17,8 +17,12 @@ import time from shutil import copy2 import sys +import logging from docopt import docopt +logging.basicConfig(filename='Webwork_to_md_logs.log', level=logging.INFO) +logging.info('Started Session') + # read passed in arguments args = docopt(__doc__) # set source_path with passed in path @@ -274,7 +278,7 @@ def repr_str(dumper, data): + '## Rubric \n\n\n' + '## Solution \n\n\n' + '## Comments \n\n\n') - # + ''.join(f'{value}' for key, value in section.items()) + # + ''.join(f'{value}' for key, value in section.items()) def get_part_type(part_type): @@ -285,8 +289,8 @@ def get_part_type(part_type): """ return {"type": part_type, "pl-customizations": - {"weight": "1", - "hide-answer-panel": "true"} + {"weight": "1", + "hide-answer-panel": "true"} } @@ -434,7 +438,7 @@ def help_problem_extract_append(problem_subsection, final_dic): @return: list that contains clean problems without any PEARL syntax in them """ if len(problem_subsection) > 1: - problem_stripped = problem_subsection.replace('\\', '').replace('textrm', '').replace('{', '').replace('}', '')\ + problem_stripped = problem_subsection.replace('\\', '').replace('textrm', '').replace('{', '').replace('}', '') \ .replace('·', '$\\cdot$').replace('END_TEXT', '').replace('BEGIN_TEXT', '').strip() if re.match(r'.\) ', problem_stripped): subsection_without_part_num = problem_stripped[3:] @@ -507,8 +511,9 @@ def progress(count, total, status=''): 'filename': 'NU_U17-33-02-002', 'file_dir': 'Contrib/BrockPhysics/College_Physics_Urone/33.Particle_Physics/33-02.Four_Basic_Forces/NU_U17-33-02-002.pg', 'folder_dir': '../../../webwork-open-problem-library/Contrib/BrockPhysics/College_Physics_Urone/33.Particle_Physics/33-02.Four_Basic_Forces', - 'dest_file_path': '33-02.Four_Basic_Forces', - 'root_dest_folder': 'source/College_Physics_Urone/'} + 'root_dest_folder': 'source/College_Physics_Urone/', + 'dest_file_path': '33.Particle_Physics/33-02.Four_Basic_Forces' + } """ dir_info = { 'filename': filename, @@ -552,6 +557,7 @@ def progress(count, total, status=''): progress(counter, len(source_files), status="Files Processed: " + str(counter) + "/" + str(len(source_files))) except Exception as e: print(e) + logging.error('Error: ' + str(e)) pass # ------------------------ STATS ------------------------ # @@ -559,4 +565,5 @@ def progress(count, total, status=''): process_time_seconds = total_end_time - total_start_time print('\n---') print('total time:', round(process_time_seconds / 60, 2), 'minutes,', round(process_time_seconds, 2), 'seconds') -print('avg time per each file:', round(process_time_seconds / counter, 2), 'seconds [', counter, '] files') \ No newline at end of file +print('avg time per each file:', round(process_time_seconds / counter, 2), 'seconds [', counter, '] files') +logging.info('Session Completed') From 10f354b4b8d5f29242a283508c7c1c92cac41014 Mon Sep 17 00:00:00 2001 From: Parsa Date: Mon, 8 Nov 2021 22:15:07 -0800 Subject: [PATCH 06/14] WEBWORK-16 + 17: Added destination arg to script, refactored and simplified sections, created new function to sanitize file_path inputs, removed unused variables, added code comments --- src/problem_bank_scripts/webwork_to_md.py | 57 ++++++++++++++--------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index 081b3fcf..87bec1b2 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -4,10 +4,11 @@ @Description: Converts webwork files from .PL to markdown .MD Usage: - webwork_to_md.py + webwork_to_md.py Arguments: - source_path Root of all the pl source files. + source_path Path to root of all the pl source files. + destination_path Path to destination of all md output files. """ import os from pathlib import Path @@ -27,24 +28,15 @@ args = docopt(__doc__) # set source_path with passed in path source_path = args[''] -# OLD source_path = '../../../webwork-open-problem-library/Contrib/BrockPhysics/College_Physics_Urone/' -# check if source_path ends with a backslash, if not, raise an error (this is needed to create the correct dest_folder) -if source_path and source_path[-1] != '/': - raise Exception('Please make sure your source_path argument ends with a backslash i.e. /') - -root_dest_folder = 'source/' + source_path.split('/')[-2] + '/' -# Creates root_dest_folder if it doesn't exist -Path(root_dest_folder).mkdir(parents=True, exist_ok=True) +destination_path = args[''] # variable declaration counter = 0 source_files = [] -src_dirs = [] title = topic = author = editor = date = source = template_version = problem_type = attribution = outcomes = difficulty = randomization = taxonomy = "" tags = assets = altText = image_line = [] total_start_time = time.process_time() - # Variable declaration for Webwork keywords metadata_end_src = "DOCUMENT();" marcos_end_src = "TEXT(beginproblem());" @@ -60,11 +52,18 @@ context_src = "Context" partial_answer_src = "showPartialCorrectAnswers" -# extract file structure from source directory (handles ALL sub-directories) -for root, dirs, files in os.walk(source_path): - for name in dirs: - dest_folder = os.path.join(root, name).removeprefix(source_path) - src_dirs.append(root_dest_folder + dest_folder) + +def sanitize_file_path(file_path): + """ + description: sanitizes the file path to ensure it has a trailing slash at the end + @param file_path: + @return: file_path with trailing backslash + """ + # check if file_path doesn't end with a backslash + if file_path and file_path[-1] != '/': + # Add backslash to end of file_path + file_path += "/" + return file_path def split_file(file_content): @@ -278,7 +277,7 @@ def repr_str(dumper, data): + '## Rubric \n\n\n' + '## Solution \n\n\n' + '## Comments \n\n\n') - # + ''.join(f'{value}' for key, value in section.items()) + # + ''.join(f'{value}' for key, value in section.items()) def get_part_type(part_type): @@ -438,7 +437,7 @@ def help_problem_extract_append(problem_subsection, final_dic): @return: list that contains clean problems without any PEARL syntax in them """ if len(problem_subsection) > 1: - problem_stripped = problem_subsection.replace('\\', '').replace('textrm', '').replace('{', '').replace('}', '') \ + problem_stripped = problem_subsection.replace('\\', '').replace('textrm', '').replace('{', '').replace('}', '')\ .replace('·', '$\\cdot$').replace('END_TEXT', '').replace('BEGIN_TEXT', '').strip() if re.match(r'.\) ', problem_stripped): subsection_without_part_num = problem_stripped[3:] @@ -487,15 +486,31 @@ def progress(count, total, status=''): sys.stdout.write('[%s] %s%s -- %s\r' % (bar, percents, '%', status)) sys.stdout.flush() +# -------------------------------------------------------------------------------------------------------------------- # + + +# sanitize source path to ensure it has a trailing backslash +source_path = sanitize_file_path(source_path) +# set root destination folder +root_dest_folder = sanitize_file_path(destination_path) + 'source/' + source_path.split('/')[-2] + '/' +# Create root_dest_folder if it doesn't exist +Path(root_dest_folder).mkdir(parents=True, exist_ok=True) -# for loop runs based # of folders in src +# iterate through all the files and dirs in the source directory for root, dirs, files in os.walk(source_path): + # iterate through all the files in the current directory + for name in dirs: + dest_folder = os.path.join(root, name).removeprefix(source_path) + # create dest file structure based on source directory + Path(root_dest_folder + dest_folder).mkdir(parents=True, exist_ok=True) # iterate through each file for file in files: + # if file is a .pg file (PEAL) if file.endswith('.pg'): + # add file path to source_files list source_files.append(os.path.join(root, file)) - +# iterate through every .pg file found in the source directory for source_filepath in source_files: try: # start timer for processing file From 451cf8a85305de790acb8f7c556c2d8c3c133b16 Mon Sep 17 00:00:00 2001 From: Parsa Rajabi Date: Mon, 20 Dec 2021 20:08:52 -0800 Subject: [PATCH 07/14] WEBWORK-24: Fixed final answer units printing with brackets, unified spacing around question body and added comments --- src/problem_bank_scripts/webwork_to_md.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index 87bec1b2..455edb9e 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -267,11 +267,15 @@ def repr_str(dumper, data): Path(destination_file_path + directory_info['filename'] + ".md").write_text('---\n' + yaml.safe_dump(yaml_dict, sort_keys=False) + '---\n\n' - + '# {{ params.vars.title }} \n\n' + + '# {{ params.vars.title }} \n' + # Question image + ''.join(f'{image}\n' for image in question_images) + # Question body + ''.join(f'\n{question}\n' for part, question in zip(question_parts, question_text) if (part == 0)) - + ''.join(f'\n## Part {part} \n{question} \n\n\n ### Answer Section\n' for part, question in zip(question_parts, question_text) if (part > 0)) - + str(question_units) + '\n\n' + # Question part # (if question is multi-part + + ''.join(f'\n## Part {part} \n{question} \n\n### Answer Section\n' for part, question in zip(question_parts, question_text) if (part > 0)) + # Final answer units + + ''.join(f'{final_answer_unit}' for final_answer_unit in question_units) + '\n\n' + '## pl-submission-panel \n\n\n' + '## pl-answer-panel \n\n\n' + '## Rubric \n\n\n' From 6c49bbcf06e6caf85b22c6ab86ef84b759fd8187 Mon Sep 17 00:00:00 2001 From: Parsa Rajabi Date: Tue, 21 Dec 2021 23:46:16 -0800 Subject: [PATCH 08/14] WEBWORK-25: Temp fix for keeping track of all final_answer_units --- src/problem_bank_scripts/webwork_to_md.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index 455edb9e..c1fb19a9 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -36,7 +36,7 @@ title = topic = author = editor = date = source = template_version = problem_type = attribution = outcomes = difficulty = randomization = taxonomy = "" tags = assets = altText = image_line = [] total_start_time = time.process_time() - +temp_final_answer_units = [] # Variable declaration for Webwork keywords metadata_end_src = "DOCUMENT();" marcos_end_src = "TEXT(beginproblem());" @@ -273,9 +273,9 @@ def repr_str(dumper, data): # Question body + ''.join(f'\n{question}\n' for part, question in zip(question_parts, question_text) if (part == 0)) # Question part # (if question is multi-part - + ''.join(f'\n## Part {part} \n{question} \n\n### Answer Section\n' for part, question in zip(question_parts, question_text) if (part > 0)) + + ''.join(f'\n## Part {part} \n{question}\n' for part, question in zip(question_parts, question_text) if (part > 0)) # Final answer units - + ''.join(f'{final_answer_unit}' for final_answer_unit in question_units) + '\n\n' + + ''.join(f'\n### Answer Section\n{final_answer_unit}' for final_answer_unit in question_units if (len(final_answer_unit) > 0)) + '\n\n' + '## pl-submission-panel \n\n\n' + '## pl-answer-panel \n\n\n' + '## Rubric \n\n\n' @@ -405,10 +405,10 @@ def help_problem_extract_ans_units(problem_subsection): final_ans_units = '' section_clean = '' if not problem_subsection.startswith("\\{ image") and not problem_subsection.endswith(") \\}"): - # if section is the end i.e. ans_rule (determines the length of the answer) - if problem_subsection.startswith("\\{ans_rule") and problem_subsection.endswith("\\)"): - # extract the question units using regex - final_ans_units = re.findall('textrm{(.+?)}', problem_subsection) + # extract the question units using regex + final_ans_units = re.findall('\\\\} \\\\\(\\\\textrm{(.+?)}', problem_subsection) + if len(final_ans_units) == 1: + temp_final_answer_units.append(temp_final_answer_units) if not problem_subsection.startswith("\\{ans_rule") and not problem_subsection.endswith("\\)"): section_clean = problem_subsection return {'section': section_clean, @@ -574,6 +574,7 @@ def progress(count, total, status=''): # print/update progress bar counter += 1 progress(counter, len(source_files), status="Files Processed: " + str(counter) + "/" + str(len(source_files))) + temp_final_answer_units = [] except Exception as e: print(e) logging.error('Error: ' + str(e)) From c4840c54634c3ccc8410724f945d1e644d6c0f12 Mon Sep 17 00:00:00 2001 From: Parsa Rajabi Date: Tue, 28 Dec 2021 13:34:05 -0800 Subject: [PATCH 09/14] WEBWORK-25: Fixed bug for multi-part questions that had missing final answer units --- src/problem_bank_scripts/webwork_to_md.py | 29 ++++++++++++----------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index c1fb19a9..a3c2d54a 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -36,7 +36,7 @@ title = topic = author = editor = date = source = template_version = problem_type = attribution = outcomes = difficulty = randomization = taxonomy = "" tags = assets = altText = image_line = [] total_start_time = time.process_time() -temp_final_answer_units = [] + # Variable declaration for Webwork keywords metadata_end_src = "DOCUMENT();" marcos_end_src = "TEXT(beginproblem());" @@ -270,13 +270,11 @@ def repr_str(dumper, data): + '# {{ params.vars.title }} \n' # Question image + ''.join(f'{image}\n' for image in question_images) - # Question body - + ''.join(f'\n{question}\n' for part, question in zip(question_parts, question_text) if (part == 0)) - # Question part # (if question is multi-part - + ''.join(f'\n## Part {part} \n{question}\n' for part, question in zip(question_parts, question_text) if (part > 0)) - # Final answer units - + ''.join(f'\n### Answer Section\n{final_answer_unit}' for final_answer_unit in question_units if (len(final_answer_unit) > 0)) + '\n\n' - + '## pl-submission-panel \n\n\n' + # Question body w/ final answer units + + ''.join(f'\n{question}\n \n### Answer Section\n{final_answer_unit}\n ' for part, question, final_answer_unit in zip(question_parts, question_text, question_units) if (part == 0)) + # Question part number and question body w/ final answer units (if question is multi-part) + + ''.join(f'\n## Part {part} \n{question}\n \n### Answer Section\n{final_answer_unit}\n' for part, question,final_answer_unit in zip(question_parts, question_text, question_units) if (part > 0)) + + '\n ## pl-submission-panel \n\n\n' + '## pl-answer-panel \n\n\n' + '## Rubric \n\n\n' + '## Solution \n\n\n' @@ -336,11 +334,12 @@ def problem_extract(question_body, image_alt_text): @return: dictionary containing question text, parts and units """ hint = '' - question_units = '' + question_unit = '' question_raw = [] question_split = '' part_headers = [] question_part = [] + multi_part_question_units = [] # split question into sections based on "$PAR" for question in question_body: @@ -359,7 +358,9 @@ def problem_extract(question_body, image_alt_text): if not hint or hint not in section_clean: subsection = help_problem_extract_ans_units(section_clean) subsection_text = subsection['section'] - question_units = subsection['final_ans_units'] + question_unit = subsection['final_ans_units'] + if len(question_unit) > 0: + multi_part_question_units.append(question_unit) subsection_multi_part = help_problem_extract_ans_type(subsection_text) subsection_multi_part_ans_type = subsection_multi_part['ans_type'] subsection_clean = subsection_multi_part['problem_clean'] @@ -370,7 +371,7 @@ def problem_extract(question_body, image_alt_text): return {'question_text': question_raw, 'question_parts': question_part, - 'question_units': question_units} + 'question_units': multi_part_question_units} def append_part_counter(part_counter, part_headers): @@ -402,17 +403,17 @@ def help_problem_extract_ans_units(problem_subsection): @param problem_subsection: @return: dictionary containing question sections and final answer units """ - final_ans_units = '' + question_final_units = '' section_clean = '' if not problem_subsection.startswith("\\{ image") and not problem_subsection.endswith(") \\}"): # extract the question units using regex final_ans_units = re.findall('\\\\} \\\\\(\\\\textrm{(.+?)}', problem_subsection) if len(final_ans_units) == 1: - temp_final_answer_units.append(temp_final_answer_units) + question_final_units = ''.join(final_ans_units) if not problem_subsection.startswith("\\{ans_rule") and not problem_subsection.endswith("\\)"): section_clean = problem_subsection return {'section': section_clean, - 'final_ans_units': final_ans_units} + 'final_ans_units': question_final_units} def help_problem_extract_ans_type(problem_subsection): From d4fd1c122be06a284078910c5a3292c6ad53180c Mon Sep 17 00:00:00 2001 From: Parsa Rajabi Date: Tue, 28 Dec 2021 15:58:53 -0800 Subject: [PATCH 10/14] WEBWORK-25: Fixed minor spacing --- src/problem_bank_scripts/webwork_to_md.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index a3c2d54a..b3516ce2 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -274,7 +274,7 @@ def repr_str(dumper, data): + ''.join(f'\n{question}\n \n### Answer Section\n{final_answer_unit}\n ' for part, question, final_answer_unit in zip(question_parts, question_text, question_units) if (part == 0)) # Question part number and question body w/ final answer units (if question is multi-part) + ''.join(f'\n## Part {part} \n{question}\n \n### Answer Section\n{final_answer_unit}\n' for part, question,final_answer_unit in zip(question_parts, question_text, question_units) if (part > 0)) - + '\n ## pl-submission-panel \n\n\n' + + '\n## pl-submission-panel \n\n\n' + '## pl-answer-panel \n\n\n' + '## Rubric \n\n\n' + '## Solution \n\n\n' From 874d30b540f5bb5a5776d8641deaa4b47935074c Mon Sep 17 00:00:00 2001 From: Parsa Rajabi Date: Tue, 28 Dec 2021 16:43:43 -0800 Subject: [PATCH 11/14] WEBWORK-25: Removed unused var --- src/problem_bank_scripts/webwork_to_md.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index b3516ce2..6d961d27 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -575,7 +575,6 @@ def progress(count, total, status=''): # print/update progress bar counter += 1 progress(counter, len(source_files), status="Files Processed: " + str(counter) + "/" + str(len(source_files))) - temp_final_answer_units = [] except Exception as e: print(e) logging.error('Error: ' + str(e)) From 0fdbe21ee3fb07e2cf157c84307b060ecfe9f3d5 Mon Sep 17 00:00:00 2001 From: Firas Moosvi Date: Thu, 23 Jun 2022 23:21:32 -0700 Subject: [PATCH 12/14] add try/except to catch counter of 0 --- src/problem_bank_scripts/webwork_to_md.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index 6d961d27..5372a38b 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -2,10 +2,8 @@ @Author: Parsa Rajabi (@parsa-rajabi) @Created: 2021 @Description: Converts webwork files from .PL to markdown .MD - Usage: webwork_to_md.py - Arguments: source_path Path to root of all the pl source files. destination_path Path to destination of all md output files. @@ -585,5 +583,8 @@ def progress(count, total, status=''): process_time_seconds = total_end_time - total_start_time print('\n---') print('total time:', round(process_time_seconds / 60, 2), 'minutes,', round(process_time_seconds, 2), 'seconds') -print('avg time per each file:', round(process_time_seconds / counter, 2), 'seconds [', counter, '] files') +try: + print('avg time per each file:', round(process_time_seconds / counter, 2), 'seconds [', counter, '] files') +except ZeroDivisionError: + print("Something went wrong, the counter is 0!") logging.info('Session Completed') From 269d0e88c48ed19da70663dd28e49df32ff5a32c Mon Sep 17 00:00:00 2001 From: Firas Moosvi Date: Thu, 23 Jun 2022 23:30:06 -0700 Subject: [PATCH 13/14] update documentation to correct file names --- src/problem_bank_scripts/webwork_to_md.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index 5372a38b..d5f99e02 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -1,11 +1,11 @@ """ @Author: Parsa Rajabi (@parsa-rajabi) @Created: 2021 -@Description: Converts webwork files from .PL to markdown .MD +@Description: Converts webwork files from .PG to markdown .MD Usage: webwork_to_md.py Arguments: - source_path Path to root of all the pl source files. + source_path Path to root of all the pg source files. destination_path Path to destination of all md output files. """ import os From c2c59c06c3b0533e49dfdee4141e4e0d269f7bf3 Mon Sep 17 00:00:00 2001 From: Firas Moosvi Date: Fri, 24 Jun 2022 00:32:14 -0700 Subject: [PATCH 14/14] adjust script so it works again --- src/problem_bank_scripts/webwork_to_md.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/problem_bank_scripts/webwork_to_md.py b/src/problem_bank_scripts/webwork_to_md.py index d5f99e02..8bcebd3d 100644 --- a/src/problem_bank_scripts/webwork_to_md.py +++ b/src/problem_bank_scripts/webwork_to_md.py @@ -23,10 +23,15 @@ logging.info('Started Session') # read passed in arguments -args = docopt(__doc__) +#args = docopt(__doc__) + # set source_path with passed in path -source_path = args[''] -destination_path = args[''] +#source_path = args[''] +#destination_path = args[''] + +#TODO: comment out the above lines to reactivate docopt, using this as a temporary workaround +source_path = '../../../webwork-open-problem-library/Contrib/BrockPhysics/College_Physics_Urone/' +destination_path = '../../../instructor_physics_bank/webwork/' # variable declaration counter = 0 @@ -272,8 +277,6 @@ def repr_str(dumper, data): + ''.join(f'\n{question}\n \n### Answer Section\n{final_answer_unit}\n ' for part, question, final_answer_unit in zip(question_parts, question_text, question_units) if (part == 0)) # Question part number and question body w/ final answer units (if question is multi-part) + ''.join(f'\n## Part {part} \n{question}\n \n### Answer Section\n{final_answer_unit}\n' for part, question,final_answer_unit in zip(question_parts, question_text, question_units) if (part > 0)) - + '\n## pl-submission-panel \n\n\n' - + '## pl-answer-panel \n\n\n' + '## Rubric \n\n\n' + '## Solution \n\n\n' + '## Comments \n\n\n') @@ -495,7 +498,10 @@ def progress(count, total, status=''): # sanitize source path to ensure it has a trailing backslash source_path = sanitize_file_path(source_path) # set root destination folder + +#TODO: Fix this so it's more robust by using pathlib! root_dest_folder = sanitize_file_path(destination_path) + 'source/' + source_path.split('/')[-2] + '/' + # Create root_dest_folder if it doesn't exist Path(root_dest_folder).mkdir(parents=True, exist_ok=True) @@ -540,7 +546,7 @@ def progress(count, total, status=''): 'root_dest_folder': root_dest_folder, 'dest_file_path': dest_file_path } - # each question has a its own unique folder named after the fiile itself i.e question file NU_123.md is within NU_123 folder + # each question has a its own unique folder named after the file itself i.e question file NU_123.md is within NU_123 folder destination_file_path = root_dest_folder + dest_file_path + "/" + filename + "/" Path(destination_file_path).mkdir(parents=True, exist_ok=True) # open and read question file @@ -584,7 +590,7 @@ def progress(count, total, status=''): print('\n---') print('total time:', round(process_time_seconds / 60, 2), 'minutes,', round(process_time_seconds, 2), 'seconds') try: - print('avg time per each file:', round(process_time_seconds / counter, 2), 'seconds [', counter, '] files') + print('avg time per file:', round(process_time_seconds / counter, 2), 'seconds [', counter, '] files') except ZeroDivisionError: print("Something went wrong, the counter is 0!") logging.info('Session Completed')