Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: purge name from certificate records during learner retirement #34799

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions lms/djangoapps/certificates/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -953,3 +953,21 @@ def invalidate_certificate(user_id, course_key_or_id, source):
return False

return True


def clear_pii_from_certificate_records_for_user(user):
"""
Utility function to remove PII from certificate records when a learner's account is being retired. Used by the
`AccountRetirementView` in the `user_api` Django app (invoked by the /api/user/v1/accounts/retire endpoint).

The update is performed using a bulk SQL update via the Django ORM. This will not trigger the GeneratedCertificate
model's custom `save()` function, nor fire any Django signals (which is desired at the time of writing). There is
nothing to update in our external systems by this update.

Args:
user (User): The User instance of the learner actively being retired.

Returns:
None
"""
GeneratedCertificate.objects.filter(user=user).update(name="")
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""
A management command, designed to be run once by Open edX Operators, to obfuscate learner PII from the
`Certificates_GeneratedCertificate` table that should have been purged during learner retirement.

A fix has been included in the retirement pipeline to properly purge this data during learner retirement. This can be
used to purge PII from accounts that have already been retired.
"""

import logging

from django.contrib.auth import get_user_model
from django.core.management.base import BaseCommand

from lms.djangoapps.certificates.models import GeneratedCertificate
from openedx.core.djangoapps.user_api.api import get_retired_user_ids

User = get_user_model()
log = logging.getLogger(__name__)


class Command(BaseCommand):
"""
This management command performs a bulk update on `GeneratedCertificate` instances. This means that it will not
invoke the custom save() function defined as part of the `GeneratedCertificate` model, and thus will not emit any
Django signals throughout the system after the update occurs. This is desired behavior. We are using this
management command to purge remnant PII, retired elsewhere in the system, that should have already been removed
from the Certificates tables. We don't need updates to propogate to external systems (like the Credentials IDA).

This management command functions by requesting a list of learners' user_ids whom have completed their journey
through the retirement pipeline. The `get_retired_user_ids` utility function is responsible for filtering out any
learners in the PENDING state, as they could still submit a request to cancel their account deletion request (and
we don't want to remove any data that may still be good).

Example usage:

# Dry Run (preview changes):
$ ./manage.py lms purge_pii_from_generatedcertificates --dry-run

# Purge data:
$ ./manage.py lms purge_pii_from_generatedcertificates
"""

help = """
Purges learners' full names from the `Certificates_GeneratedCertificate` table if their account has been
successfully retired.
"""

def add_arguments(self, parser):
parser.add_argument(
"--dry-run",
action="store_true",
help="Shows a preview of what users would be affected by running this management command.",
)

def handle(self, *args, **options):
retired_user_ids = get_retired_user_ids()
if not options["dry_run"]:
log.warning(
f"Purging `name` from the certificate records of the following users: {retired_user_ids}"
)
GeneratedCertificate.objects.filter(user_id__in=retired_user_ids).update(name="")
else:
log.info(
"DRY RUN: running this management command would purge `name` data from the following users: "
f"{retired_user_ids}"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""
Tests for the `purge_pii_from_generatedcertificates` management command.
"""


from django.core.management import call_command
from testfixtures import LogCapture

from common.djangoapps.student.tests.factories import UserFactory
from lms.djangoapps.certificates.data import CertificateStatuses
from lms.djangoapps.certificates.models import GeneratedCertificate
from lms.djangoapps.certificates.tests.factories import GeneratedCertificateFactory
from openedx.core.djangoapps.user_api.models import RetirementState
from openedx.core.djangoapps.user_api.tests.factories import (
RetirementStateFactory,
UserRetirementRequestFactory,
UserRetirementStatusFactory,
)
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase
from xmodule.modulestore.tests.factories import CourseFactory


class PurgePiiFromCertificatesTests(ModuleStoreTestCase):
"""
Tests for the `purge_pii_from_generatedcertificates` management command.
"""
@classmethod
def setUpClass(cls):
"""
The retirement pipeline is not fully enabled by default. In order to properly test the management command, we
must ensure that at least one of the required RetirementState states (`COMPLETE`) exists.
"""
super().setUpClass()
cls.complete = RetirementStateFactory(state_name="COMPLETE")

@classmethod
def tearDownClass(cls):
# Remove any retirement state objects that we created during this test suite run. We don't want to poison other
# test suites.
RetirementState.objects.all().delete()
super().tearDownClass()

def setUp(self):
super().setUp()
self.course_run = CourseFactory()
# create an "active" learner that is not associated with any retirement requests, used to verify that the
# management command doesn't purge any info for active users.
self.user_active = UserFactory()
justinhynes marked this conversation as resolved.
Show resolved Hide resolved
self.user_active_name = "Teysa Karlov"
GeneratedCertificateFactory(
status=CertificateStatuses.downloadable,
course_id=self.course_run.id,
user=self.user_active,
name=self.user_active_name,
grade=1.00,
)
# create a second learner that is associated with a retirement request, used to verify that the management
# command purges info successfully from a GeneratedCertificate instance associated with a retired learner
self.user_retired = UserFactory()
self.user_retired_name = "Nicol Bolas"
justinhynes marked this conversation as resolved.
Show resolved Hide resolved
GeneratedCertificateFactory(
status=CertificateStatuses.downloadable,
course_id=self.course_run.id,
user=self.user_retired,
name=self.user_retired_name,
grade=0.99,
)
UserRetirementStatusFactory(
user=self.user_retired,
current_state=self.complete,
last_state=self.complete,
)
UserRetirementRequestFactory(user=self.user_retired)

def test_management_command(self):
"""
Verify the management command purges expected data from a GeneratedCertificate instance if a learner has
successfully had their account retired.
"""
cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active)
assert cert_for_active_user.name == self.user_active_name
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == self.user_retired_name

call_command("purge_pii_from_generatedcertificates")

cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active)
assert cert_for_active_user.name == self.user_active_name
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == ""

def test_management_command_dry_run(self):
"""
Verify that the management command does not purge any data when invoked with the `--dry-run` flag
"""
expected_log_msg = (
"DRY RUN: running this management command would purge `name` data from the following users: "
f"[{self.user_retired.id}]"
)

cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active)
assert cert_for_active_user.name == self.user_active_name
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == self.user_retired_name

with LogCapture() as logger:
call_command("purge_pii_from_generatedcertificates", "--dry-run")

cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active)
assert cert_for_active_user.name == self.user_active_name
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired)
assert cert_for_retired_user.name == self.user_retired_name

assert logger.records[0].msg == expected_log_msg
47 changes: 43 additions & 4 deletions lms/djangoapps/certificates/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
can_show_certificate_message,
certificate_status_for_student,
certificate_downloadable_status,
clear_pii_from_certificate_records_for_user,
create_certificate_invalidation_entry,
create_or_update_certificate_allowlist_entry,
display_date_for_certificate,
Expand Down Expand Up @@ -76,6 +77,9 @@
from openedx.core.djangoapps.site_configuration.tests.test_util import with_site_configuration

CAN_GENERATE_METHOD = 'lms.djangoapps.certificates.generation_handler._can_generate_regular_certificate'
BETA_TESTER_METHOD = 'lms.djangoapps.certificates.api.access.is_beta_tester'
CERTS_VIEWABLE_METHOD = 'lms.djangoapps.certificates.api.certificates_viewable_for_course'
PASSED_OR_ALLOWLISTED_METHOD = 'lms.djangoapps.certificates.api._has_passed_or_is_allowlisted'
FEATURES_WITH_CERTS_ENABLED = settings.FEATURES.copy()
FEATURES_WITH_CERTS_ENABLED['CERTIFICATES_HTML_VIEW'] = True

Expand Down Expand Up @@ -1120,10 +1124,6 @@ def test_get_certificate_invalidation_entry_dne(self):
for index, message in enumerate(expected_messages):
assert message in log.records[index].getMessage()

BETA_TESTER_METHOD = 'lms.djangoapps.certificates.api.access.is_beta_tester'
CERTS_VIEWABLE_METHOD = 'lms.djangoapps.certificates.api.certificates_viewable_for_course'
PASSED_OR_ALLOWLISTED_METHOD = 'lms.djangoapps.certificates.api._has_passed_or_is_allowlisted'
justinhynes marked this conversation as resolved.
Show resolved Hide resolved


class MockGeneratedCertificate:
"""
Expand Down Expand Up @@ -1268,3 +1268,42 @@ def test_beta_tester(self):

with patch(BETA_TESTER_METHOD, return_value=True):
assert not can_show_certificate_message(self.course, self.user, grade, certs_enabled)


class CertificatesLearnerRetirementFunctionality(ModuleStoreTestCase):
"""
API tests for utility functions used as part of the learner retirement pipeline to remove PII from certificate
records.
"""
def setUp(self):
super().setUp()
self.user = UserFactory()
self.user_full_name = "Maeby Funke"
self.course1 = CourseOverviewFactory()
self.course2 = CourseOverviewFactory()
GeneratedCertificateFactory(
course_id=self.course1.id,
name=self.user_full_name,
user=self.user,
)
GeneratedCertificateFactory(
course_id=self.course2.id,
name=self.user_full_name,
user=self.user,
)

def test_clear_pii_from_certificate_records(self):
"""
Unit test for the `clear_pii_from_certificate_records` utility function, used to wipe PII from certificate
records when a learner's account is being retired.
"""
cert_course1 = GeneratedCertificate.objects.get(user=self.user, course_id=self.course1.id)
cert_course2 = GeneratedCertificate.objects.get(user=self.user, course_id=self.course2.id)
assert cert_course1.name == self.user_full_name
assert cert_course2.name == self.user_full_name

clear_pii_from_certificate_records_for_user(self.user)
cert_course1 = GeneratedCertificate.objects.get(user=self.user, course_id=self.course1.id)
cert_course2 = GeneratedCertificate.objects.get(user=self.user, course_id=self.course2.id)
assert cert_course1.name == ""
assert cert_course2.name == ""
Loading
Loading