Skip to content

Commit

Permalink
feat: mgmt cmd to replace v1 libr refs in courses (openedx#32904)
Browse files Browse the repository at this point in the history
This PR adds a management command to, given a mapping of V1 content libraries to matching v2 content libraries, replaces references to v1 libs in courses (in library source xblocks) with V2 libraries. It does so by manipulating the mongo document directly.

it also offers some improvements to the management command which copies all v1 libraries into v2 libraries.
  • Loading branch information
connorhaugh committed Aug 11, 2023
1 parent 1b35bf7 commit 4b38b1f
Show file tree
Hide file tree
Showing 4 changed files with 290 additions and 27 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""A Command to Copy or uncopy V1 Content Libraries entires to be stored as v2 content libraries."""

import logging
import csv
from textwrap import dedent

from django.core.management import BaseCommand, CommandError
Expand Down Expand Up @@ -28,15 +29,13 @@ class Command(BaseCommand):
and -- file followed by the path for a list of libraries from a file.
Example usage:
$ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid' --all
$ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid' --all --uncopy
$ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid 'library-v1:edX+DemoX+Better_Library'
$ ./manage.py cms copy_libraries_from_v1_to_v2 'collection_uuid 'library-v1:edX+DemoX+Better_Library' --uncopy
$ ./manage.py cms copy_libraries_from_v1_to_v2
library-v1:edX+DemoX+Demo_Library' 'library-v1:edX+DemoX+Better_Library' -c 'collection_uuid'
$ ./manage.py cms copy_libraries_from_v1_to_v2 --all --uncopy
$ ./manage.py cms copy_libraries_from_v1_to_v2 'library-v1:edX+DemoX+Better_Library' --uncopy
$ ./manage.py cms copy_libraries_from_v1_to_v2
'11111111-2111-4111-8111-111111111111'
'./list_of--library-locators- --file
'11111111-2111-4111-8111-111111111111'
'./list_of--library-locators.csv --all
Note:
This Command Also produces an "output file" which contains the mapping of locators and the status of the copy.
Expand All @@ -49,17 +48,18 @@ def add_arguments(self, parser):
"""arguements for command"""

parser.add_argument(
'-collection_uuid',
'-c',
nargs=1,
'collection_uuid',
type=str,
help='the uuid for the collection to create the content library in.'
)
parser.add_argument(
'library_ids',
nargs='*',
help='a space-seperated list of v1 library ids to copy'
'output_csv',
type=str,
nargs='?',
default=None,
help='a file path to write the tasks output to. Without this the result is simply logged.'
)

parser.add_argument(
'--all',
action='store_true',
Expand All @@ -72,12 +72,11 @@ def add_arguments(self, parser):
dest='uncopy',
help='Delete libraries specified'
)

parser.add_argument(
'output_csv',
nargs='?',
default=None,
help='a file path to write the tasks output to. Without this the result is simply logged.'
'library_ids',
nargs='*',
default=[],
help='a space-seperated list of v1 library ids to copy'
)

def _parse_library_key(self, raw_value):
Expand All @@ -90,10 +89,6 @@ def _parse_library_key(self, raw_value):

def handle(self, *args, **options): # lint-amnesty, pylint: disable=unused-argument
"""Parse args and generate tasks for copying content."""
print(options)

if (not options['library_ids'] and not options['all']) or (options['library_ids'] and options['all']):
raise CommandError("copy_libraries_from_v1_to_v2 requires one or more <library_id>s or the --all flag.")

if (not options['library_ids'] and not options['all']) or (options['library_ids'] and options['all']):
raise CommandError("copy_libraries_from_v1_to_v2 requires one or more <library_id>s or the --all flag.")
Expand All @@ -110,16 +105,17 @@ def handle(self, *args, **options): # lint-amnesty, pylint: disable=unused-argu
v1_library_keys = list(map(self._parse_library_key, options['library_ids']))

create_library_task_group = group([
delete_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid'][0])
delete_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid'])
if options['uncopy']
else create_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid'][0])
else create_v2_library_from_v1_library.s(str(v1_library_key), options['collection_uuid'])
for v1_library_key in v1_library_keys
])

group_result = create_library_task_group.apply_async().get()
if options['output_csv']:
with open(options['output_csv'][0], 'w', encoding='utf-8', newline='') as output_writer:
output_writer.writerow("v1_library_id", "v2_library_id", "status", "error_msg")
with open(options['output_csv'], 'w', encoding='utf-8', newline='') as file:
output_writer = csv.writer(file)
output_writer.writerow(["v1_library_id", "v2_library_id", "status", "error_msg"])
for result in group_result:
output_writer.write(result.keys())
output_writer.writerow(result.values())
log.info(group_result)
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
"""
A Command which, given a mapping of V1 to V2 Libraries,
edits all xblocks in courses which refer to the v1 library to point to the v2 library.
"""

import logging
import csv

from django.core.management import BaseCommand, CommandError
from celery import group

from openedx.core.djangoapps.content.course_overviews.models import CourseOverview
from cms.djangoapps.contentstore.tasks import (
replace_all_library_source_blocks_ids_for_course,
validate_all_library_source_blocks_ids_for_course,
undo_all_library_source_blocks_ids_for_course
)

log = logging.getLogger(__name__)


class Command(BaseCommand):
"""
Example usage:
$ ./manage.py cms replace_v1_lib_refs_with_v2_in_courses '/path/to/library_mappings.csv'
$ ./manage.py cms replace_v1_lib_refs_with_v2_in_courses '/path/to/library_mappings.csv' --validate
$ ./manage.py cms replace_v1_lib_refs_with_v2_in_courses '/path/to/library_mappings.csv' --undo
"""
def add_arguments(self, parser):
parser.add_argument('file_path', type=str, help='Path to the CSV file.')
parser.add_argument('--validate', action='store_true', help='Validate previous runs of the command')
parser.add_argument('--undo', action='store_true', help='Validate previous runs of the command')

def replace_all_library_source_blocks_ids(self, v1_to_v2_lib_map):
"""A method to replace 'source_library_id' in all relevant blocks."""

courses = CourseOverview.get_all_courses()

# Use Celery to distribute the workload
tasks = group(
replace_all_library_source_blocks_ids_for_course.s(
course,
v1_to_v2_lib_map
)
for course in courses
)
results = tasks.apply_async()

for result in results.get():
if isinstance(result, Exception):
# Handle the task failure here
log.error("Task failed with error: %s", str(result))
continue
log.info(
"Completed replacing all v1 library source ids with v2 library source ids"
)

def validate(self, v1_to_v2_lib_map):
""" Validate that replace_all_library_source_blocks_ids was successful"""
courses = CourseOverview.get_all_courses()
tasks = group(validate_all_library_source_blocks_ids_for_course.s(course, v1_to_v2_lib_map) for course in courses) # lint-amnesty, pylint: disable=line-too-long
results = tasks.apply_async()

validation = set()
for result in results.get():
if isinstance(result, Exception):
# Handle the task failure here
log.error("Task failed with error: %s", str(result))
continue
else:
validation.update(result)

if validation.issubset(v1_to_v2_lib_map.values()):
log.info("Validation: All values in the input map are present in courses.")
else:
log.info(
"Validation Failed: There are unmapped v1 libraries."
)

def undo(self, v1_to_v2_lib_map):
""" undo the changes made by replace_all_library_source_blocks_ids"""
courses = CourseOverview.get_all_courses()

# Use Celery to distribute the workload
tasks = group(undo_all_library_source_blocks_ids_for_course.s(course, v1_to_v2_lib_map) for course in courses)
results = tasks.apply_async()

for result in results.get():
if isinstance(result, Exception):
# Handle the task failure here
log.error("Task failed with error: %s", str(result))
continue
log.info("Completed replacing all v2 library source ids with v1 library source ids. Undo Complete")

def handle(self, *args, **kwargs):
""" Parse arguments and begin command"""
file_path = kwargs['file_path']
v1_to_v2_lib_map = {}
try:
with open(file_path, 'r', encoding='utf-8') as csvfile:

if not file_path.endswith('.csv'):
raise CommandError('Invalid file format. Only CSV files are supported.')

csv_reader = csv.reader(csvfile)

for row in csv_reader:
if len(row) >= 2:
key = row[0].strip()
value = row[1].strip()
v1_to_v2_lib_map[key] = value

print("Data successfully imported as dictionary:")

except FileNotFoundError:
log.error("File not found at '%s'.", {file_path})
except Exception as e: # lint-amnesty, pylint: disable=broad-except
log.error("An error occurred: %s", {str(e)})

if kwargs['validate']:
self.validate(v1_to_v2_lib_map)
if kwargs['undo']:
self.undo(v1_to_v2_lib_map)
else:
self.replace_all_library_source_blocks_ids(v1_to_v2_lib_map)
141 changes: 141 additions & 0 deletions cms/djangoapps/contentstore/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,8 @@ def _create_copy_content_task(v2_library_key, v1_library_key):
def _create_metadata(v1_library_key, collection_uuid):
"""instansiate an index for the V2 lib in the collection"""

print(collection_uuid)

store = modulestore()
v1_library = store.get_library(v1_library_key)
collection = get_collection(collection_uuid).uuid
Expand Down Expand Up @@ -1000,3 +1002,142 @@ def delete_v1_library(v1_library_key_string):
"status": "SUCCESS",
"msg": "SUCCESS"
}


@shared_task(time_limit=30)
@set_code_owner_attribute
def validate_all_library_source_blocks_ids_for_course(course, v1_to_v2_lib_map):
"""Search a Modulestore for all library source blocks in a course by querying mongo.
replace all source_library_ids with the corresponding v2 value from the map
"""
store = modulestore()
with store.bulk_operations(course.id):
visited = []
for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]:
blocks = store.get_items(
course.id.for_branch(branch),
settings={'source_library_id': {'$exists': True}}
)
for xblock in blocks:
if xblock.source_library_id not in v1_to_v2_lib_map.values():
# lint-amnesty, pylint: disable=broad-except
raise Exception(
f'{xblock.source_library_id} in {course.id} is not found in mapping. Validation failed'
)
visited.append(xblock.source_library_id)
# return sucess
return visited


@shared_task(time_limit=30)
@set_code_owner_attribute
def replace_all_library_source_blocks_ids_for_course(course, v1_to_v2_lib_map): # lint-amnesty, pylint: disable=useless-return
"""Search a Modulestore for all library source blocks in a course by querying mongo.
replace all source_library_ids with the corresponding v2 value from the map.
This will trigger a publish on the course for every published library source block.
"""
store = modulestore()
with store.bulk_operations(course.id):
#for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]:
draft_blocks, published_blocks = [
store.get_items(
course.id.for_branch(branch),
settings={'source_library_id': {'$exists': True}}
)
for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]
]

published_dict = {block.location: block for block in published_blocks}

for draft_library_source_block in draft_blocks:
try:
new_source_id = str(v1_to_v2_lib_map[draft_library_source_block.source_library_id])
except KeyError:
#skip invalid keys
LOGGER.error(
'Key %s not found in mapping. Skipping block for course %s',
str({draft_library_source_block.source_library_id}),
str(course.id)
)
continue

# The publsihed branch should be updated as well as the draft branch
# This way, if authors "discard changes," they won't be reverted back to the V1 lib.
# However, we also don't want to publish the draft branch.
try:
if published_dict[draft_library_source_block.location] is not None:
#temporarily set the published version to be the draft & publish it.
temp = published_dict[draft_library_source_block.location]
temp.source_library_id = new_source_id
store.update_item(temp, None)
store.publish(temp.location, None)
draft_library_source_block.source_library_id = new_source_id
store.update_item(draft_library_source_block, None)
except KeyError:
#Warn, but just update the draft block if no published block for draft block.
LOGGER.warning(
'No matching published block for draft block %s',
str(draft_library_source_block.location)
)
draft_library_source_block.source_library_id = new_source_id
store.update_item(draft_library_source_block, None)
# return success
return


@shared_task(time_limit=30)
@set_code_owner_attribute
def undo_all_library_source_blocks_ids_for_course(course, v1_to_v2_lib_map): # lint-amnesty, pylint: disable=useless-return
"""Search a Modulestore for all library source blocks in a course by querying mongo.
replace all source_library_ids with the corresponding v1 value from the inverted map.
This is exists to undo changes made previously.
"""

v2_to_v1_lib_map = {v: k for k, v in v1_to_v2_lib_map.items()}

store = modulestore()
draft_blocks, published_blocks = [
store.get_items(
course.id.for_branch(branch),
settings={'source_library_id': {'$exists': True}}
)
for branch in [ModuleStoreEnum.BranchName.draft, ModuleStoreEnum.BranchName.published]
]

published_dict = {block.location: block for block in published_blocks}

for draft_library_source_block in draft_blocks:
try:
new_source_id = str(v2_to_v1_lib_map[draft_library_source_block.source_library_id])
except KeyError:
#skip invalid keys
LOGGER.error(
'Key %s not found in mapping. Skipping block for course %s',
str({draft_library_source_block.source_library_id}),
str(course.id)
)
continue

# The publsihed branch should be updated as well as the draft branch
# This way, if authors "discard changes," they won't be reverted back to the V1 lib.
# However, we also don't want to publish the draft branch.
try:
if published_dict[draft_library_source_block.location] is not None:
#temporarily set the published version to be the draft & publish it.
temp = published_dict[draft_library_source_block.location]
temp.source_library_id = new_source_id
store.update_item(temp, None)
store.publish(temp.location, None)
draft_library_source_block.source_library_id = new_source_id
store.update_item(draft_library_source_block, None)
except KeyError:
#Warn, but just update the draft block if no published block for draft block.
LOGGER.warning(
'No matching published block for draft block %s',
str(draft_library_source_block.location)
)
draft_library_source_block.source_library_id = new_source_id
store.update_item(draft_library_source_block, None)
# return success
return
1 change: 1 addition & 0 deletions xmodule/modulestore/split_mongo/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -1951,6 +1951,7 @@ def update_item(self, block, user_id, allow_not_found=False, force=False, **kwar
The implementation tries to detect which, if any changes, actually need to be saved and thus won't version
the definition, structure, nor course if they didn't change.
"""

partitioned_fields = self.partition_xblock_fields_by_scope(block)
definition_locator = getattr(block, "definition_locator", None)
if definition_locator is None and not allow_not_found:
Expand Down

0 comments on commit 4b38b1f

Please sign in to comment.