Skip to content

Commit

Permalink
Improve pseudonymization
Browse files Browse the repository at this point in the history
- Added studentID to be pseudonymizized
- Fixed hashing issues
  • Loading branch information
mikaelGusse committed Mar 7, 2024
1 parent 8bc452f commit 93acbc0
Show file tree
Hide file tree
Showing 12 changed files with 104 additions and 60 deletions.
6 changes: 3 additions & 3 deletions course/staff_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
def format_group(group: StudentGroup, pseudonymized: bool):
if pseudonymized:
for member in group.members.all():
format_user(member.user, True)
format_user(member.user, True, member)
return group


Expand Down Expand Up @@ -71,7 +71,7 @@ def _get_students_with_tags(self) -> List[Dict[str, Any]]:
participants = ci.all_students.prefetch_tags(ci)
data = []
for participant in participants:
format_user(participant.user, self.pseudonymize)
format_user(participant.user, self.pseudonymize, participant)
user_id = participant.user.id
user_tags = CachedStudent(ci, participant.user).data
user_tags_html = ' '.join(tags[slug].html_label for slug in user_tags['tag_slugs'] if slug in tags)
Expand Down Expand Up @@ -124,7 +124,7 @@ def get_resource_objects(self):
)
else:
group = StudentGroup(course_instance=self.instance)
self.group = format_group(group, self.pseudonymize)
self.group = group

def get_form_kwargs(self):
kwargs = super().get_form_kwargs()
Expand Down
13 changes: 13 additions & 0 deletions course/templates/course/_course_menu.html
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,19 @@ <h4>{% translate "COURSE_STAFF" %}</h4>
{% translate "SUBMISSION_DEVIATIONS" %}
</a>
</li>

<li class="menu-pseudonymize">
<a href="{% url 'toggle-pseudonymization' %}">
{% if pseudonymize %}
<span class="glyphicon glyphicon-eye-open" aria-hidden="true"></span>
{% translate "UNPSEUDONYMIZE" %}
{% else %}
<span class="glyphicon glyphicon-eye-close" aria-hidden="true"></span>
{% translate "PSEUDONYMIZE" %}
{% endif %}
</a>
</li>

{% endif %}

{% for group in course_menu.staff_link_groups %}
Expand Down
2 changes: 1 addition & 1 deletion deviations/viewbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ def get_deviation_groups(
lambda obj: (obj.submitter, obj.exercise.course_module),
)
for (submitter, module), deviations_iter in deviation_groups:
format_user(submitter.user, pseudonymize)
format_user(submitter.user, pseudonymize, submitter)
deviations = list(deviations_iter)
can_group = True
show_granter = True
Expand Down
17 changes: 11 additions & 6 deletions exercise/staff_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@
def format_submission(submission: Submission, pseudonymized: bool):
if pseudonymized:
for submitter in submission.submitters.all():
submitter.user = format_user(submitter.user, pseudonymized=True)
pseudo = True
format_user(submitter.user, pseudo, submitter)
return submission


Expand All @@ -76,6 +77,7 @@ def get_common_objects(self) -> None:
)
for submission in qs:
format_submission(submission, self.pseudonymize)
print(submission.submitters)
self.all = self.request.GET.get('all', None)
self.all_url = self.exercise.get_submission_list_url() + "?all=yes"
self.submissions = qs if self.all else qs[:self.default_limit]
Expand Down Expand Up @@ -127,7 +129,8 @@ def get_common_objects(self) -> None:
)
if self.pseudonymize:
for profile in profiles.values():
format_user(profile.user, pseudonymized=True)
pseudo = True
format_user(profile.user, pseudo, profile)
# Add UserProfile instances to the dicts in submitter_summaries, so we can
# use the 'profiles' template tag.
for submitter_summary in submitter_summaries:
Expand Down Expand Up @@ -184,7 +187,7 @@ def get_common_objects(self) -> None:
self.not_best = False
self.not_last = False
for submission in self.submissions:
format_submission(submission, self.pseudonymize)
format_submission(self.submission, self.pseudonymize)
if submission.id != self.submission.id:
if submission.force_exercise_points:
self.not_final = True
Expand Down Expand Up @@ -407,8 +410,9 @@ def get_common_objects(self):
super().get_common_objects()
self.tags = [USERTAG_INTERNAL, USERTAG_EXTERNAL]
self.tags.extend(self.instance.usertags.all())
self.pseudonymize = self.request.session.get("pseudonymize", False)
self.note(
'tags',
'tags', 'pseudonymize',
)


Expand All @@ -421,8 +425,9 @@ def get_common_objects(self):
self.tags = list(self.instance.usertags.all())
self.internal_user_label = settings_text('INTERNAL_USER_LABEL')
self.external_user_label = settings_text('EXTERNAL_USER_LABEL')
self.pseudonymize = self.request.session.get("pseudonymize", False)
self.note(
'tags', 'internal_user_label', 'external_user_label',
'tags', 'internal_user_label', 'external_user_label', 'pseudonymize',
)


Expand All @@ -442,7 +447,7 @@ def get_resource_objects(self):
User,
id=self.kwargs[self.user_kw],
)
self.student = format_user(student, self.pseudonymize)
self.student = format_user(student, self.pseudonymize, student.userprofile)
self.note('student')

def get_common_objects(self):
Expand Down
66 changes: 36 additions & 30 deletions exercise/templates/exercise/staff/analytics.html
Original file line number Diff line number Diff line change
Expand Up @@ -113,35 +113,41 @@
<script src="{% static 'js/llama.min.js' %}"></script>
<link rel="stylesheet" href="{% static 'css/d3Stream.min.css' %}" />
<link rel="stylesheet" href="{% static 'css/llama.min.css' %}" />
<script>
$(function () {
new LlamaClient({

apiUrl: function (filter, download) {
var url = '{% url "api:course-aggregatedata-list" version=2 course_id=instance.id %}';
var a = '?';
if (filter && filter != '#all') {
url += '?filter=' + filter.substr(1);
a = '&';
}
if (download) {
url += a + 'format=csv';
}
return url;
},

userUrl: function (uid) {
var url = '{% url "user-results" course_slug=course.url instance_slug=instance.url user_id=0 %}';
return url.substr(0, url.length - 1) + uid;
},

progressLabels: [
'{% translate "NOT_SUBMITTED" %}',
'{% translate "PROGRESS_LABEL_<90%_POINTS" %}',
'{% translate "PROGRESS_LABEL_>=90%_POINTS" %}'
],

{% if not pseudonymize %}
<script>
$(function () {
new LlamaClient({

apiUrl: function (filter, download) {
var url = '{% url "api:course-aggregatedata-list" version=2 course_id=instance.id %}';
var a = '?';
if (filter && filter != '#all') {
url += '?filter=' + filter.substr(1);
a = '&';
}
if (download) {
url += a + 'format=csv';
}
return url;
},

userUrl: function (uid) {
var url = '{% url "user-results" course_slug=course.url instance_slug=instance.url user_id=0 %}';
return url.substr(0, url.length - 1) + uid;
},

progressLabels: [
'{% translate "NOT_SUBMITTED" %}',
'{% translate "PROGRESS_LABEL_<90%_POINTS" %}',
'{% translate "PROGRESS_LABEL_>=90%_POINTS" %}'
],

});
});
});
</script>
</script>
{% else %}
<div style="position: absolute; top: 50%; left: 50%; transform: translate(-50%); font-size: 3em;">
This view is not available when pseudonymization is enabled. Disable it to see visualizations.
</div>
{% endif %}
{% endblock %}
22 changes: 14 additions & 8 deletions exercise/templates/exercise/staff/results.html
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,20 @@
results_staff.js is the main script for this page. -->
<!--<link rel="stylesheet" href="{% static 'exercise/css/loading_animation.css' %}" />-->
<link rel="stylesheet" href="{% static 'exercise/css/results_staff.css' %}" />
<script
src="{% static 'exercise/results_staff.js' %}"
data-exercises-url="{% url 'api:course-exercises-list' version=2 course_id=instance.id %}"
data-usertags-url="{% url 'api:course-usertags-list' version=2 course_id=instance.id %}"
data-points-url="{% url 'api:course-resultsdata-list' version=2 course_id=instance.id %}"
data-points-best-url="{% url 'api:course-resultsdata-best-list' version=2 course_id=instance.id %}"
defer>
</script>
{% if not pseudonymize %}
<script
src="{% static 'exercise/results_staff.js' %}"
data-exercises-url="{% url 'api:course-exercises-list' version=2 course_id=instance.id %}"
data-usertags-url="{% url 'api:course-usertags-list' version=2 course_id=instance.id %}"
data-points-url="{% url 'api:course-resultsdata-list' version=2 course_id=instance.id %}"
data-points-best-url="{% url 'api:course-resultsdata-best-list' version=2 course_id=instance.id %}"
defer>
</script>
{% else %}
<div style="position: absolute; top: 50%; left: 50%; transform: translate(-50%); font-size: 3em;">
This view is not available when pseudonymization is enabled. Disable it to see all results.
</div>
{% endif %}

<!--Colortags-->
<script src="{% static 'django_colortag.js' %}"></script>
Expand Down
2 changes: 1 addition & 1 deletion exercise/viewbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def get_summary_user(self) -> Optional[User]:

@cached_property
def submission_entry(self) -> int:
return next(s for s in self.submissions if s.id == self.submission.id)
return next(s for s in self.submissions if s.id == self.submission.id)

@cached_property
def index(self) -> int:
Expand Down
1 change: 1 addition & 0 deletions external_services/templatetags/external_services.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import string

from django import template

from lib.errors import TagUsageError
Expand Down
8 changes: 8 additions & 0 deletions locale/en/LC_MESSAGES/django.po
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,14 @@ msgstr "Deadline deviations"
msgid "SUBMISSION_DEVIATIONS"
msgstr "Submission deviations"

#: course/templates/course/_course_menu.html
msgid "UNPSEUDONYMIZE"
msgstr "Un-pseudonymize"

#: course/templates/course/_course_menu.html
msgid "PSEUDONYMIZE"
msgstr "Pseudonymize"

#: course/templates/course/_enroll_form.html
msgid "ENROLL_THROUGH_SIS"
msgstr ""
Expand Down
9 changes: 9 additions & 0 deletions locale/fi/LC_MESSAGES/django.po
Original file line number Diff line number Diff line change
Expand Up @@ -1023,6 +1023,15 @@ msgstr "Määräaikojen poikkeamat"
msgid "SUBMISSION_DEVIATIONS"
msgstr "Palautuskertojen poikkeamat"

#: course/templates/course/_course_menu.html
msgid "UNPSEUDONYMIZE"
msgstr "Pseudonymisointi pois päältä"

#: course/templates/course/_course_menu.html
msgid "PSEUDONYMIZE"
msgstr "Pseudonymisointi päälle"


#: course/templates/course/_enroll_form.html
msgid "ENROLL_THROUGH_SIS"
msgstr ""
Expand Down
3 changes: 0 additions & 3 deletions templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,6 @@ <h4>{% translate "SITE" %}</h4>
<li><a href="{% url 'accessibility_statement' %}">{% translate "ACCESSIBILITY_STATEMENT" %}</a></li>
<li><a href="{% url 'support_channels' %}">{% translate "SUPPORT" %}</a></li>
<li><a href="https://link.webropol.com/s/aplus-feedback" target="_blank">{% translate "FEEDBACK" %}</a></li>
{% if is_course_staff %}
<li><a href="{% url 'toggle-pseudonymization' %}">{% if pseudonymize %} Unpseudonymize {% else %} Pseudonymize {% endif %} personal data</a></li>
{% endif %}
<li><p class="navbar-text">{% brand_name %} {{ APLUS_VERSION }}</p></li>
</ul>
</div>
Expand Down
15 changes: 7 additions & 8 deletions userprofile/pseudonymize.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
from faker import Faker
import hashlib
import random

from django.contrib.auth.models import User

from userprofile.models import UserProfile

fake = Faker()
fake = Faker(['fi_FI', 'en_US'], use_weighting=False)
num_fakes = 500

fakes = {
Expand All @@ -16,19 +15,19 @@
'username': [fake.unique.user_name() for _ in range(num_fakes)],
}


def pseudonymize(key: str, data: str):
hashkey = int(hashlib.md5(data.encode('utf-8')).hexdigest(), 16) % num_fakes
hashkey = int(hashlib.sha256(data.encode('utf-8')).hexdigest(), 16) % num_fakes
if key in fakes:
return fakes[key][hashkey]
return key

def format_user(user: User, pseudonymized: bool):

def format_user(user: User, pseudonymized: bool, user_profile: UserProfile = None):
if pseudonymized:
for _user_profile in UserProfile.objects.all():
if _user_profile.user == user:
_user_profile.student_id = random.randint(10000,90000)
# Return formatted versions of the user's attributes and all the user class's methods
user.student_id = 99999
if user_profile is not None:
user_profile.student_id = str(fake.unique.random_int(min=10, max=10000))
user.first_name = pseudonymize('first_name', user.first_name)
user.last_name = pseudonymize('last_name', user.last_name)
user.email = pseudonymize('email', user.email)
Expand Down

0 comments on commit 93acbc0

Please sign in to comment.