-
Notifications
You must be signed in to change notification settings - Fork 0
/
PiiMasker.py
32 lines (24 loc) · 1.05 KB
/
PiiMasker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import hashlib
class PiiMasker:
"""
Fetch wants to hide personal identifiable information (PII). The fields
`device_id` and `ip` should be masked, but in a way where it is easy for
data analysts to identify duplicate values in those fields.
Use sha256 hash to mask the values of the fields `device_id` and `ip`.
"""
def __init__(self, mask_fields = {"device_id": "masked_device_id", "ip": "masked_ip"}):
# map of existing fields to mask to new field names
self.mask_fields = mask_fields
def mask_all(self, data):
"""
Mask all fields in the data dictionary that are in the mask_fields map
replacing the value with the hashed value and renaming the field to the
new field name
"""
for field, new_field in self.mask_fields.items():
if field in data:
data[new_field] = self.mask_value(data[field])
del data[field]
return data
def mask_value(self, value):
return hashlib.sha256(value.encode()).hexdigest()