diff --git a/cms/djangoapps/contentstore/management/commands/copy_libraries_from_v1_to_v2.py b/cms/djangoapps/contentstore/management/commands/copy_libraries_from_v1_to_v2.py index c0a866115127..3be5cab11c23 100644 --- a/cms/djangoapps/contentstore/management/commands/copy_libraries_from_v1_to_v2.py +++ b/cms/djangoapps/contentstore/management/commands/copy_libraries_from_v1_to_v2.py @@ -1,6 +1,7 @@ """A Command to Copy or uncopy V1 Content Libraries entires to be stored as v2 content libraries.""" import logging +import csv from textwrap import dedent from django.core.management import BaseCommand, CommandError @@ -28,15 +29,13 @@ class Command(BaseCommand): and -- file followed by the path for a list of libraries from a file. Example usage: - $ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid' --all + $ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid' --all --uncopy + $ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid 'library-v1:edX+DemoX+Better_Library' + $ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid 'library-v1:edX+DemoX+Better_Library' --uncopy $ ./manage.py cms copy_libraries_from_v1_to_v2 - library-v1:edX+DemoX+Demo_Library' 'library-v1:edX+DemoX+Better_Library' -c 'collection_uuid' - $ ./manage.py cms copy_libraries_from_v1_to_v2 --all --uncopy - $ ./manage.py cms copy_libraries_from_v1_to_v2 'library-v1:edX+DemoX+Better_Library' --uncopy - $ ./manage.py cms copy_libraries_from_v1_to_v2 - '11111111-2111-4111-8111-111111111111' - './list_of--library-locators- --file + '11111111-2111-4111-8111-111111111111' + './list_of--library-locators.csv --all Note: This Command Also produces an "output file" which contains the mapping of locators and the status of the copy. @@ -49,17 +48,18 @@ def add_arguments(self, parser): """arguements for command""" parser.add_argument( - '-collection_uuid', - '-c', - nargs=1, + 'collection_uuid', type=str, help='the uuid for the collection to create the content library in.' ) parser.add_argument( - 'library_ids', - nargs='*', - help='a space-seperated list of v1 library ids to copy' + 'output_csv', + type=str, + nargs='?', + default=None, + help='a file path to write the tasks output to. Without this the result is simply logged.' ) + parser.add_argument( '--all', action='store_true', @@ -72,12 +72,11 @@ def add_arguments(self, parser): dest='uncopy', help='Delete libraries specified' ) - parser.add_argument( - 'output_csv', - nargs='?', - default=None, - help='a file path to write the tasks output to. Without this the result is simply logged.' + 'library_ids', + nargs='*', + default=[], + help='a space-seperated list of v1 library ids to copy' ) def _parse_library_key(self, raw_value): @@ -90,10 +89,6 @@ def _parse_library_key(self, raw_value): def handle(self, *args, **options): # lint-amnesty, pylint: disable=unused-argument """Parse args and generate tasks for copying content.""" - print(options) - - if (not options['library_ids'] and not options['all']) or (options['library_ids'] and options['all']): - raise CommandError("copy_libraries_from_v1_to_v2 requires one or more s or the --all flag.") if (not options['library_ids'] and not options['all']) or (options['library_ids'] and options['all']): raise CommandError("copy_libraries_from_v1_to_v2 requires one or more s or the --all flag.") @@ -110,16 +105,17 @@ def handle(self, *args, **options): # lint-amnesty, pylint: disable=unused-argu v1_library_keys = list(map(self._parse_library_key, options['library_ids'])) create_library_task_group = group([ - delete_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid'][0]) + delete_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid']) if options['uncopy'] - else create_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid'][0]) + else create_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid']) for v1_library_key in v1_library_keys ]) group_result = create_library_task_group.apply_async().get() if options['output_csv']: - with open(options['output_csv'][0], 'w', encoding='utf-8', newline='') as output_writer: - output_writer.writerow("v1_library_id", "v2_library_id", "status", "error_msg") + with open(options['output_csv'], 'w', encoding='utf-8', newline='') as file: + output_writer = csv.writer(file) + output_writer.writerow(["v1_library_id", "v2_library_id", "status", "error_msg"]) for result in group_result: - output_writer.write(result.keys()) + output_writer.writerow(result.values()) log.info(group_result) diff --git a/cms/djangoapps/contentstore/management/commands/replace_v1_lib_refs_with_v2_in_courses.py b/cms/djangoapps/contentstore/management/commands/replace_v1_lib_refs_with_v2_in_courses.py new file mode 100644 index 000000000000..39d64912bf70 --- /dev/null +++ b/cms/djangoapps/contentstore/management/commands/replace_v1_lib_refs_with_v2_in_courses.py @@ -0,0 +1,125 @@ +""" +A Command which, given a mapping of V1 to V2 Libraries, +edits all xblocks in courses which refer to the v1 library to point to the v2 library. +""" + +import logging +import csv + +from django.core.management import BaseCommand, CommandError +from celery import group + +from openedx.core.djangoapps.content.course_overviews.models import CourseOverview +from cms.djangoapps.contentstore.tasks import ( + replace_all_library_source_blocks_ids_for_course, + validate_all_library_source_blocks_ids_for_course, + undo_all_library_source_blocks_ids_for_course +) + +log = logging.getLogger(__name__) + + +class Command(BaseCommand): + """ + Example usage: + $ ./manage.py cms replace_v1_lib_refs_with_v2_in_courses '/path/to/library_mappings.csv' + $ ./manage.py cms replace_v1_lib_refs_with_v2_in_courses '/path/to/library_mappings.csv' --validate + $ ./manage.py cms replace_v1_lib_refs_with_v2_in_courses '/path/to/library_mappings.csv' --undo + """ + def add_arguments(self, parser): + parser.add_argument('file_path', type=str, help='Path to the CSV file.') + parser.add_argument('--validate', action='store_true', help='Validate previous runs of the command') + parser.add_argument('--undo', action='store_true', help='Validate previous runs of the command') + + def replace_all_library_source_blocks_ids(self, v1_to_v2_lib_map): + """A method to replace 'source_library_id' in all relevant blocks.""" + + courses = CourseOverview.get_all_courses() + + # Use Celery to distribute the workload + tasks = group( + replace_all_library_source_blocks_ids_for_course.s( + course, + v1_to_v2_lib_map + ) + for course in courses + ) + results = tasks.apply_async() + + for result in results.get(): + if isinstance(result, Exception): + # Handle the task failure here + log.error("Task failed with error: %s", str(result)) + continue + log.info( + "Completed replacing all v1 library source ids with v2 library source ids" + ) + + def validate(self, v1_to_v2_lib_map): + """ Validate that replace_all_library_source_blocks_ids was successful""" + courses = CourseOverview.get_all_courses() + tasks = group(validate_all_library_source_blocks_ids_for_course.s(course, v1_to_v2_lib_map) for course in courses) # lint-amnesty, pylint: disable=line-too-long + results = tasks.apply_async() + + validation = set() + for result in results.get(): + if isinstance(result, Exception): + # Handle the task failure here + log.error("Task failed with error: %s", str(result)) + continue + else: + validation.update(result) + + if validation.issubset(v1_to_v2_lib_map.values()): + log.info("Validation: All values in the input map are present in courses.") + else: + log.info( + "Validation Failed: There are unmapped v1 libraries." + ) + + def undo(self, v1_to_v2_lib_map): + """ undo the changes made by replace_all_library_source_blocks_ids""" + courses = CourseOverview.get_all_courses() + + # Use Celery to distribute the workload + tasks = group(undo_all_library_source_blocks_ids_for_course.s(course, v1_to_v2_lib_map) for course in courses) + results = tasks.apply_async() + + for result in results.get(): + if isinstance(result, Exception): + # Handle the task failure here + log.error("Task failed with error: %s", str(result)) + continue + log.info("Completed replacing all v2 library source ids with v1 library source ids. Undo Complete") + + def handle(self, *args, **kwargs): + """ Parse arguments and begin command""" + file_path = kwargs['file_path'] + v1_to_v2_lib_map = {} + try: + with open(file_path, 'r', encoding='utf-8') as csvfile: + + if not file_path.endswith('.csv'): + raise CommandError('Invalid file format. Only CSV files are supported.') + + csv_reader = csv.reader(csvfile) + + for row in csv_reader: + if len(row) >= 2: + key = row[0].strip() + value = row[1].strip() + v1_to_v2_lib_map[key] = value + + print("Data successfully imported as dictionary:") + + except FileNotFoundError: + log.error("File not found at '%s'.", {file_path}) + except Exception as e: # lint-amnesty, pylint: disable=broad-except + log.error("An error occurred: %s", {str(e)}) + + if kwargs['validate']: + self.validate(v1_to_v2_lib_map) + if kwargs['undo']: + self.undo(v1_to_v2_lib_map) + else: + self.replace_all_library_source_blocks_ids(v1_to_v2_lib_map) diff --git a/cms/djangoapps/contentstore/tasks.py b/cms/djangoapps/contentstore/tasks.py index d38ac165ce71..b6861e4ddc5b 100644 --- a/cms/djangoapps/contentstore/tasks.py +++ b/cms/djangoapps/contentstore/tasks.py @@ -868,6 +868,8 @@ def _create_copy_content_task(v2_library_key, v1_library_key): def _create_metadata(v1_library_key, collection_uuid): """instansiate an index for the V2 lib in the collection""" + print(collection_uuid) + store = modulestore() v1_library = store.get_library(v1_library_key) collection = get_collection(collection_uuid).uuid @@ -1000,3 +1002,142 @@ def delete_v1_library(v1_library_key_string): "status": "SUCCESS", "msg": "SUCCESS" } + + +@shared_task(time_limit=30) +@set_code_owner_attribute +def validate_all_library_source_blocks_ids_for_course(course, v1_to_v2_lib_map): + """Search a Modulestore for all library source blocks in a course by querying mongo. + replace all source_library_ids with the corresponding v2 value from the map + """ + store = modulestore() + with store.bulk_operations(course.id): + visited = [] + for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]: + blocks = store.get_items( + course.id.for_branch(branch), + settings={'source_library_id': {'$exists': True}} + ) + for xblock in blocks: + if xblock.source_library_id not in v1_to_v2_lib_map.values(): + # lint-amnesty, pylint: disable=broad-except + raise Exception( + f'{xblock.source_library_id} in {course.id} is not found in mapping. Validation failed' + ) + visited.append(xblock.source_library_id) + # return sucess + return visited + + +@shared_task(time_limit=30) +@set_code_owner_attribute +def replace_all_library_source_blocks_ids_for_course(course, v1_to_v2_lib_map): # lint-amnesty, pylint: disable=useless-return + """Search a Modulestore for all library source blocks in a course by querying mongo. + replace all source_library_ids with the corresponding v2 value from the map. + + This will trigger a publish on the course for every published library source block. + """ + store = modulestore() + with store.bulk_operations(course.id): + #for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]: + draft_blocks, published_blocks = [ + store.get_items( + course.id.for_branch(branch), + settings={'source_library_id': {'$exists': True}} + ) + for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published] + ] + + published_dict = {block.location: block for block in published_blocks} + + for draft_library_source_block in draft_blocks: + try: + new_source_id = str(v1_to_v2_lib_map[draft_library_source_block.source_library_id]) + except KeyError: + #skip invalid keys + LOGGER.error( + 'Key %s not found in mapping. Skipping block for course %s', + str({draft_library_source_block.source_library_id}), + str(course.id) + ) + continue + + # The publsihed branch should be updated as well as the draft branch + # This way, if authors "discard changes," they won't be reverted back to the V1 lib. + # However, we also don't want to publish the draft branch. + try: + if published_dict[draft_library_source_block.location] is not None: + #temporarily set the published version to be the draft & publish it. + temp = published_dict[draft_library_source_block.location] + temp.source_library_id = new_source_id + store.update_item(temp, None) + store.publish(temp.location, None) + draft_library_source_block.source_library_id = new_source_id + store.update_item(draft_library_source_block, None) + except KeyError: + #Warn, but just update the draft block if no published block for draft block. + LOGGER.warning( + 'No matching published block for draft block %s', + str(draft_library_source_block.location) + ) + draft_library_source_block.source_library_id = new_source_id + store.update_item(draft_library_source_block, None) + # return success + return + + +@shared_task(time_limit=30) +@set_code_owner_attribute +def undo_all_library_source_blocks_ids_for_course(course, v1_to_v2_lib_map): # lint-amnesty, pylint: disable=useless-return + """Search a Modulestore for all library source blocks in a course by querying mongo. + replace all source_library_ids with the corresponding v1 value from the inverted map. + This is exists to undo changes made previously. + """ + + v2_to_v1_lib_map = {v: k for k, v in v1_to_v2_lib_map.items()} + + store = modulestore() + draft_blocks, published_blocks = [ + store.get_items( + course.id.for_branch(branch), + settings={'source_library_id': {'$exists': True}} + ) + for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published] + ] + + published_dict = {block.location: block for block in published_blocks} + + for draft_library_source_block in draft_blocks: + try: + new_source_id = str(v2_to_v1_lib_map[draft_library_source_block.source_library_id]) + except KeyError: + #skip invalid keys + LOGGER.error( + 'Key %s not found in mapping. Skipping block for course %s', + str({draft_library_source_block.source_library_id}), + str(course.id) + ) + continue + + # The publsihed branch should be updated as well as the draft branch + # This way, if authors "discard changes," they won't be reverted back to the V1 lib. + # However, we also don't want to publish the draft branch. + try: + if published_dict[draft_library_source_block.location] is not None: + #temporarily set the published version to be the draft & publish it. + temp = published_dict[draft_library_source_block.location] + temp.source_library_id = new_source_id + store.update_item(temp, None) + store.publish(temp.location, None) + draft_library_source_block.source_library_id = new_source_id + store.update_item(draft_library_source_block, None) + except KeyError: + #Warn, but just update the draft block if no published block for draft block. + LOGGER.warning( + 'No matching published block for draft block %s', + str(draft_library_source_block.location) + ) + draft_library_source_block.source_library_id = new_source_id + store.update_item(draft_library_source_block, None) + # return success + return diff --git a/xmodule/modulestore/split_mongo/split.py b/xmodule/modulestore/split_mongo/split.py index e2f73759f0a8..c6e4c7889adf 100644 --- a/xmodule/modulestore/split_mongo/split.py +++ b/xmodule/modulestore/split_mongo/split.py @@ -1951,6 +1951,7 @@ def update_item(self, block, user_id, allow_not_found=False, force=False, **kwar The implementation tries to detect which, if any changes, actually need to be saved and thus won't version the definition, structure, nor course if they didn't change. """ + partitioned_fields = self.partition_xblock_fields_by_scope(block) definition_locator = getattr(block, "definition_locator", None) if definition_locator is None and not allow_not_found: