Skip to content
This repository has been archived by the owner on Apr 3, 2024. It is now read-only.

Commit

Permalink
Merge pull request #63 from openedx/jill/retire-user
Browse files Browse the repository at this point in the history
Adds user retirement sink
  • Loading branch information
Ian2012 authored Dec 7, 2023
2 parents 12bbe66 + 2e41ccd commit bc68887
Show file tree
Hide file tree
Showing 12 changed files with 253 additions and 8 deletions.
2 changes: 1 addition & 1 deletion event_sink_clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
A sink for Open edX events to send them to ClickHouse.
"""

__version__ = "0.4.0"
__version__ = "0.5.0"
14 changes: 14 additions & 0 deletions event_sink_clickhouse/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,20 @@ class Meta:
]


class UserRetirementSerializer(BaseSinkSerializer, serializers.ModelSerializer):
"""Serializer for user retirement events."""

user_id = serializers.CharField(source="id")

class Meta:
"""Meta class for user retirement serializer."""

model = get_model("auth_user")
fields = [
"user_id",
]


class CourseOverviewSerializer(BaseSinkSerializer, serializers.ModelSerializer):
"""Serializer for course overview events."""

Expand Down
9 changes: 9 additions & 0 deletions event_sink_clickhouse/settings/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,16 @@ def plugin_settings(settings):
"timeout_secs": 5,
}

settings.EVENT_SINK_CLICKHOUSE_PII_MODELS = [
"user_profile",
"external_id",
]

settings.EVENT_SINK_CLICKHOUSE_MODEL_CONFIG = {
"auth_user": {
"module": "django.contrib.auth.models",
"model": "User",
},
"user_profile": {
"module": "common.djangoapps.student.models",
"model": "UserProfile",
Expand Down
4 changes: 4 additions & 0 deletions event_sink_clickhouse/settings/production.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ def plugin_settings(settings):
"EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG",
settings.EVENT_SINK_CLICKHOUSE_BACKEND_CONFIG,
)
settings.EVENT_SINK_CLICKHOUSE_PII_MODELS = settings.ENV_TOKENS.get(
"EVENT_SINK_CLICKHOUSE_PII_MODELS",
settings.EVENT_SINK_CLICKHOUSE_PII_MODELS,
)
31 changes: 28 additions & 3 deletions event_sink_clickhouse/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,18 @@
Signal handler functions, mapped to specific signals in apps.py.
"""
from django.db.models.signals import post_save
from django.dispatch import receiver
from django.dispatch import Signal, receiver

from event_sink_clickhouse.sinks.external_id_sink import ExternalIDSInk
from event_sink_clickhouse.sinks.external_id_sink import ExternalIdSink
from event_sink_clickhouse.sinks.user_retire import UserRetirementSink
from event_sink_clickhouse.utils import get_model

try:
from openedx.core.djangoapps.user_api.accounts.signals import USER_RETIRE_LMS_MISC
except ImportError:
# Tests don't have the platform installed
USER_RETIRE_LMS_MISC = Signal()


def receive_course_publish( # pylint: disable=unused-argument # pragma: no cover
sender, course_key, **kwargs
Expand Down Expand Up @@ -43,9 +50,27 @@ def on_externalid_saved( # pylint: disable=unused-argument # pragma: no cover
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
from event_sink_clickhouse.tasks import dump_data_to_clickhouse # pylint: disable=import-outside-toplevel

sink = ExternalIDSInk(None, None)
sink = ExternalIdSink(None, None)
dump_data_to_clickhouse.delay(
sink_module=sink.__module__,
sink_name=sink.__class__.__name__,
object_id=str(instance.id),
)


@receiver(USER_RETIRE_LMS_MISC)
def on_user_retirement( # pylint: disable=unused-argument # pragma: no cover
sender, user, **kwargs
):
"""
Receives a user retirement signal and queues the retire_user job.
"""
# import here, because signal is registered at startup, but items in tasks are not yet able to be loaded
from event_sink_clickhouse.tasks import dump_data_to_clickhouse # pylint: disable=import-outside-toplevel

sink = UserRetirementSink(None, None)
dump_data_to_clickhouse.delay(
sink_module=sink.__module__,
sink_name=sink.__class__.__name__,
object_id=str(user.id),
)
2 changes: 1 addition & 1 deletion event_sink_clickhouse/sinks/external_id_sink.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from event_sink_clickhouse.sinks.base_sink import ModelBaseSink


class ExternalIDSInk(ModelBaseSink): # pylint: disable=abstract-method
class ExternalIdSink(ModelBaseSink): # pylint: disable=abstract-method
"""
Sink for user external ID serializer
"""
Expand Down
52 changes: 52 additions & 0 deletions event_sink_clickhouse/sinks/user_retire.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""User retirement sink"""
import requests
from django.conf import settings

from event_sink_clickhouse.serializers import UserRetirementSerializer
from event_sink_clickhouse.sinks.base_sink import ModelBaseSink


class UserRetirementSink(ModelBaseSink): # pylint: disable=abstract-method
"""
Sink for user retirement events
"""

model = "auth_user"
unique_key = "id"
clickhouse_table_name = (
"dummy" # uses settings.EVENT_SINK_CLICKHOUSE_PII_MODELS instead
)
timestamp_field = "modified"
name = "User Retirement"
serializer_class = UserRetirementSerializer

def send_item(self, serialized_item, many=False):
"""
Unlike the other data sinks, the User Retirement sink deletes records from the user PII tables in Clickhouse.
Send delete queries to remove the serialized User from ClickHouse.
"""
if many:
users = serialized_item
else:
users = [serialized_item]
user_ids = {str(user["user_id"]) for user in users}
user_ids_str = ",".join(sorted(user_ids))
clickhouse_pii_tables = getattr(
settings, "EVENT_SINK_CLICKHOUSE_PII_MODELS", []
)

for table in clickhouse_pii_tables:
params = {
"query": f"ALTER TABLE {self.ch_database}.{table} DELETE WHERE user_id in ({user_ids_str})",
}
request = requests.Request(
"POST",
self.ch_url,
params=params,
auth=self.ch_auth,
)
self._send_clickhouse_request(
request,
expected_insert_rows=0, # DELETE requests don't return a row count
)
1 change: 1 addition & 0 deletions requirements/dev.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@

diff-cover # Changeset diff test coverage
edx-i18n-tools # For i18n_tool dummy
black # For formatting
10 changes: 10 additions & 0 deletions requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ billiard==4.2.0
# via
# -r requirements/quality.txt
# celery
black==23.11.0
# via -r requirements/dev.in
build==1.0.3
# via
# -r requirements/pip-tools.txt
Expand Down Expand Up @@ -58,6 +60,7 @@ click==8.1.7
# via
# -r requirements/pip-tools.txt
# -r requirements/quality.txt
# black
# celery
# click-didyoumean
# click-log
Expand Down Expand Up @@ -191,6 +194,8 @@ mccabe==0.7.0
# via
# -r requirements/quality.txt
# pylint
mypy-extensions==1.0.0
# via black
newrelic==9.2.0
# via
# -r requirements/quality.txt
Expand All @@ -200,12 +205,15 @@ packaging==23.2
# -r requirements/ci.txt
# -r requirements/pip-tools.txt
# -r requirements/quality.txt
# black
# build
# pyproject-api
# pytest
# tox
path==16.7.1
# via edx-i18n-tools
pathspec==0.11.2
# via black
pbr==6.0.0
# via
# -r requirements/quality.txt
Expand All @@ -217,6 +225,7 @@ platformdirs==3.11.0
# -c https://raw.githubusercontent.com/edx/edx-lint/master/edx_lint/files/common_constraints.txt
# -r requirements/ci.txt
# -r requirements/quality.txt
# black
# pylint
# tox
# virtualenv
Expand Down Expand Up @@ -345,6 +354,7 @@ tomli==2.0.1
# -r requirements/ci.txt
# -r requirements/pip-tools.txt
# -r requirements/quality.txt
# black
# build
# coverage
# pip-tools
Expand Down
2 changes: 2 additions & 0 deletions test_utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@
"language",
])

FakeUser = namedtuple("FakeUser", ["id"])


class FakeXBlock:
"""
Expand Down
29 changes: 26 additions & 3 deletions tests/test_signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@

from django.test import TestCase

from event_sink_clickhouse.signals import on_externalid_saved, on_user_profile_updated, receive_course_publish
from event_sink_clickhouse.sinks.external_id_sink import ExternalIDSInk
from event_sink_clickhouse.signals import (
on_externalid_saved,
on_user_profile_updated,
on_user_retirement,
receive_course_publish,
)
from event_sink_clickhouse.sinks.external_id_sink import ExternalIdSink
from event_sink_clickhouse.sinks.user_retire import UserRetirementSink


class SignalHandlersTestCase(TestCase):
Expand Down Expand Up @@ -45,7 +51,24 @@ def test_on_externalid_saved(self, mock_dump_task):
sender = Mock()
on_externalid_saved(sender, instance)

sink = ExternalIDSInk(None, None)
sink = ExternalIdSink(None, None)

mock_dump_task.delay.assert_called_once_with(
sink_module=sink.__module__,
sink_name=sink.__class__.__name__,
object_id=str(instance.id),
)

@patch("event_sink_clickhouse.tasks.dump_data_to_clickhouse")
def test_on_user_retirement(self, mock_dump_task):
"""
Test that on_user_retirement calls dump_data_to_clickhouse
"""
instance = Mock()
sender = Mock()
on_user_retirement(sender, instance)

sink = UserRetirementSink(None, None)

mock_dump_task.delay.assert_called_once_with(
sink_module=sink.__module__,
Expand Down
105 changes: 105 additions & 0 deletions tests/test_user_retire.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
"""
Tests for the user_retire sinks.
"""
import logging
from unittest.mock import patch

import responses
from django.test.utils import override_settings
from responses.registries import OrderedRegistry

from event_sink_clickhouse.sinks.user_retire import UserRetirementSink
from event_sink_clickhouse.tasks import dump_data_to_clickhouse
from test_utils.helpers import FakeUser

log = logging.getLogger(__name__)


@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter
registry=OrderedRegistry
)
@override_settings(EVENT_SINK_CLICKHOUSE_PII_MODELS=["user_profile", "external_id"])
@patch("event_sink_clickhouse.sinks.user_retire.UserRetirementSink.serialize_item")
@patch("event_sink_clickhouse.sinks.user_retire.UserRetirementSink.is_enabled")
@patch("event_sink_clickhouse.sinks.user_retire.UserRetirementSink.get_model")
def test_retire_user(mock_user_model, mock_is_enabled, mock_serialize_item):
"""
Test of a successful user retirement.
"""
# Create a fake user
user = FakeUser(246)
mock_user_model.return_value.get_from_id.return_value = user
mock_is_enabled.return_value = True
mock_serialize_item.return_value = {"user_id": user.id}

# Use the responses library to catch the POSTs to ClickHouse
# and match them against the expected values
user_profile_delete = responses.post(
"https://foo.bar/",
match=[
responses.matchers.query_param_matcher(
{
"query": f"ALTER TABLE cool_data.user_profile DELETE WHERE user_id in ({user.id})",
}
)
],
)
external_id_delete = responses.post(
"https://foo.bar/",
match=[
responses.matchers.query_param_matcher(
{
"query": f"ALTER TABLE cool_data.external_id DELETE WHERE user_id in ({user.id})",
}
)
],
)

sink = UserRetirementSink(None, None)
dump_data_to_clickhouse(
sink_module=sink.__module__,
sink_name=sink.__class__.__name__,
object_id=user.id,
)

assert mock_user_model.call_count == 1
assert mock_is_enabled.call_count == 1
assert mock_serialize_item.call_count == 1
assert user_profile_delete.call_count == 1
assert external_id_delete.call_count == 1


@responses.activate( # pylint: disable=unexpected-keyword-arg,no-value-for-parameter
registry=OrderedRegistry
)
@override_settings(EVENT_SINK_CLICKHOUSE_PII_MODELS=["user_profile"])
@patch("event_sink_clickhouse.sinks.user_retire.UserRetirementSink.serialize_item")
def test_retire_many_users(mock_serialize_item):
"""
Test of a successful "many users" retirement.
"""
# Create and serialize a few fake users
users = (FakeUser(246), FakeUser(22), FakeUser(91))
mock_serialize_item.return_value = [{"user_id": user.id} for user in users]

# Use the responses library to catch the POSTs to ClickHouse
# and match them against the expected values
user_profile_delete = responses.post(
"https://foo.bar/",
match=[
responses.matchers.query_param_matcher(
{
"query": "ALTER TABLE cool_data.user_profile DELETE WHERE user_id in (22,246,91)",
}
)
],
)

sink = UserRetirementSink(None, log)
sink.dump(
item_id=users[0].id,
many=True,
)

assert mock_serialize_item.call_count == 1
assert user_profile_delete.call_count == 1

0 comments on commit bc68887

Please sign in to comment.