-
Notifications
You must be signed in to change notification settings - Fork 3.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: purge name from certificate records during user retirement (#34799
) [APER-3241] This PR updates the retirement pipeline to purge learners' names from certificate records when their account is being retired. It also introduces a new management command that can be used by Open edX operators to purge the leftover name data (PII data) from the `certificates_generatedcertificate` table. This is designed as a one-time use data fixup, as the retirement functionality should clean this moving forward.
- Loading branch information
1 parent
7709f4b
commit 9fbc6e3
Showing
9 changed files
with
501 additions
and
71 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
66 changes: 66 additions & 0 deletions
66
lms/djangoapps/certificates/management/commands/purge_pii_from_generatedcertificates.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
""" | ||
A management command, designed to be run once by Open edX Operators, to obfuscate learner PII from the | ||
`Certificates_GeneratedCertificate` table that should have been purged during learner retirement. | ||
A fix has been included in the retirement pipeline to properly purge this data during learner retirement. This can be | ||
used to purge PII from accounts that have already been retired. | ||
""" | ||
|
||
import logging | ||
|
||
from django.contrib.auth import get_user_model | ||
from django.core.management.base import BaseCommand | ||
|
||
from lms.djangoapps.certificates.models import GeneratedCertificate | ||
from openedx.core.djangoapps.user_api.api import get_retired_user_ids | ||
|
||
User = get_user_model() | ||
log = logging.getLogger(__name__) | ||
|
||
|
||
class Command(BaseCommand): | ||
""" | ||
This management command performs a bulk update on `GeneratedCertificate` instances. This means that it will not | ||
invoke the custom save() function defined as part of the `GeneratedCertificate` model, and thus will not emit any | ||
Django signals throughout the system after the update occurs. This is desired behavior. We are using this | ||
management command to purge remnant PII, retired elsewhere in the system, that should have already been removed | ||
from the Certificates tables. We don't need updates to propogate to external systems (like the Credentials IDA). | ||
This management command functions by requesting a list of learners' user_ids whom have completed their journey | ||
through the retirement pipeline. The `get_retired_user_ids` utility function is responsible for filtering out any | ||
learners in the PENDING state, as they could still submit a request to cancel their account deletion request (and | ||
we don't want to remove any data that may still be good). | ||
Example usage: | ||
# Dry Run (preview changes): | ||
$ ./manage.py lms purge_pii_from_generatedcertificates --dry-run | ||
# Purge data: | ||
$ ./manage.py lms purge_pii_from_generatedcertificates | ||
""" | ||
|
||
help = """ | ||
Purges learners' full names from the `Certificates_GeneratedCertificate` table if their account has been | ||
successfully retired. | ||
""" | ||
|
||
def add_arguments(self, parser): | ||
parser.add_argument( | ||
"--dry-run", | ||
action="store_true", | ||
help="Shows a preview of what users would be affected by running this management command.", | ||
) | ||
|
||
def handle(self, *args, **options): | ||
retired_user_ids = get_retired_user_ids() | ||
if not options["dry_run"]: | ||
log.warning( | ||
f"Purging `name` from the certificate records of the following users: {retired_user_ids}" | ||
) | ||
GeneratedCertificate.objects.filter(user_id__in=retired_user_ids).update(name="") | ||
else: | ||
log.info( | ||
"DRY RUN: running this management command would purge `name` data from the following users: " | ||
f"{retired_user_ids}" | ||
) |
114 changes: 114 additions & 0 deletions
114
...oapps/certificates/management/commands/tests/test_purge_pii_from_generatedcertificates.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
""" | ||
Tests for the `purge_pii_from_generatedcertificates` management command. | ||
""" | ||
|
||
|
||
from django.core.management import call_command | ||
from testfixtures import LogCapture | ||
|
||
from common.djangoapps.student.tests.factories import UserFactory | ||
from lms.djangoapps.certificates.data import CertificateStatuses | ||
from lms.djangoapps.certificates.models import GeneratedCertificate | ||
from lms.djangoapps.certificates.tests.factories import GeneratedCertificateFactory | ||
from openedx.core.djangoapps.user_api.models import RetirementState | ||
from openedx.core.djangoapps.user_api.tests.factories import ( | ||
RetirementStateFactory, | ||
UserRetirementRequestFactory, | ||
UserRetirementStatusFactory, | ||
) | ||
from xmodule.modulestore.tests.django_utils import ModuleStoreTestCase | ||
from xmodule.modulestore.tests.factories import CourseFactory | ||
|
||
|
||
class PurgePiiFromCertificatesTests(ModuleStoreTestCase): | ||
""" | ||
Tests for the `purge_pii_from_generatedcertificates` management command. | ||
""" | ||
@classmethod | ||
def setUpClass(cls): | ||
""" | ||
The retirement pipeline is not fully enabled by default. In order to properly test the management command, we | ||
must ensure that at least one of the required RetirementState states (`COMPLETE`) exists. | ||
""" | ||
super().setUpClass() | ||
cls.complete = RetirementStateFactory(state_name="COMPLETE") | ||
|
||
@classmethod | ||
def tearDownClass(cls): | ||
# Remove any retirement state objects that we created during this test suite run. We don't want to poison other | ||
# test suites. | ||
RetirementState.objects.all().delete() | ||
super().tearDownClass() | ||
|
||
def setUp(self): | ||
super().setUp() | ||
self.course_run = CourseFactory() | ||
# create an "active" learner that is not associated with any retirement requests, used to verify that the | ||
# management command doesn't purge any info for active users. | ||
self.user_active = UserFactory() | ||
self.user_active_name = "Teysa Karlov" | ||
GeneratedCertificateFactory( | ||
status=CertificateStatuses.downloadable, | ||
course_id=self.course_run.id, | ||
user=self.user_active, | ||
name=self.user_active_name, | ||
grade=1.00, | ||
) | ||
# create a second learner that is associated with a retirement request, used to verify that the management | ||
# command purges info successfully from a GeneratedCertificate instance associated with a retired learner | ||
self.user_retired = UserFactory() | ||
self.user_retired_name = "Nicol Bolas" | ||
GeneratedCertificateFactory( | ||
status=CertificateStatuses.downloadable, | ||
course_id=self.course_run.id, | ||
user=self.user_retired, | ||
name=self.user_retired_name, | ||
grade=0.99, | ||
) | ||
UserRetirementStatusFactory( | ||
user=self.user_retired, | ||
current_state=self.complete, | ||
last_state=self.complete, | ||
) | ||
UserRetirementRequestFactory(user=self.user_retired) | ||
|
||
def test_management_command(self): | ||
""" | ||
Verify the management command purges expected data from a GeneratedCertificate instance if a learner has | ||
successfully had their account retired. | ||
""" | ||
cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active) | ||
assert cert_for_active_user.name == self.user_active_name | ||
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired) | ||
assert cert_for_retired_user.name == self.user_retired_name | ||
|
||
call_command("purge_pii_from_generatedcertificates") | ||
|
||
cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active) | ||
assert cert_for_active_user.name == self.user_active_name | ||
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired) | ||
assert cert_for_retired_user.name == "" | ||
|
||
def test_management_command_dry_run(self): | ||
""" | ||
Verify that the management command does not purge any data when invoked with the `--dry-run` flag | ||
""" | ||
expected_log_msg = ( | ||
"DRY RUN: running this management command would purge `name` data from the following users: " | ||
f"[{self.user_retired.id}]" | ||
) | ||
|
||
cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active) | ||
assert cert_for_active_user.name == self.user_active_name | ||
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired) | ||
assert cert_for_retired_user.name == self.user_retired_name | ||
|
||
with LogCapture() as logger: | ||
call_command("purge_pii_from_generatedcertificates", "--dry-run") | ||
|
||
cert_for_active_user = GeneratedCertificate.objects.get(user_id=self.user_active) | ||
assert cert_for_active_user.name == self.user_active_name | ||
cert_for_retired_user = GeneratedCertificate.objects.get(user_id=self.user_retired) | ||
assert cert_for_retired_user.name == self.user_retired_name | ||
|
||
assert logger.records[0].msg == expected_log_msg |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.