From 0f756fe7db07947c2889d265d0678f76d6f330a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannick=20B=C3=B6rner?= Date: Thu, 1 Apr 2021 16:12:27 +0200 Subject: [PATCH 1/8] Capitalize all methods --- Healex.HL7v2Anonymizer/Program.cs | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/Healex.HL7v2Anonymizer/Program.cs b/Healex.HL7v2Anonymizer/Program.cs index a1756cd..9d8676a 100644 --- a/Healex.HL7v2Anonymizer/Program.cs +++ b/Healex.HL7v2Anonymizer/Program.cs @@ -1,4 +1,5 @@ -using HL7.Dotnetcore; +using Healex.HL7v2Anonymizer.Services; +using HL7.Dotnetcore; using Microsoft.Extensions.Configuration; using System; using System.IO; @@ -9,10 +10,10 @@ class Program { static void Main(string[] args) { - waitForInput(); + WaitForInput(); } - private static void waitForInput() + private static void WaitForInput() { Console.WriteLine("Welcome to the Healex HL7v2 anonymizer!"); Console.WriteLine("---------------------------------------------------------------------------------------"); @@ -21,15 +22,15 @@ private static void waitForInput() Console.WriteLine("Enter the directory to your v2 messages and press enter:"); var directory = Console.ReadLine(); Console.WriteLine(); - tryAnonymizeMessages(directory); + TryAnonymizeMessages(directory); Console.WriteLine(); - waitForInput(); + WaitForInput(); } - private static void tryAnonymizeMessages(string directory) + private static void TryAnonymizeMessages(string directory) { - var pathsToV2Messages = getPathsToV2Messages(directory); - var anonymizer = new Anonymizer(getReplacementOptions()); + var pathsToV2Messages = GetPathsToV2Messages(directory); + var anonymizer = new Anonymizer(GetReplacementOptions()); if (pathsToV2Messages is not null) { @@ -38,14 +39,14 @@ private static void tryAnonymizeMessages(string directory) foreach (string path in pathsToV2Messages) { - var message = readAndParseMessage(path); + var message = ReadAndParseMessage(path); var success = anonymizer.Anonymize(message); - serializeAndWriteMessageOrLogError(success, message, path); + SerializeAndWriteMessageOrLogError(success, message, path); } } } - private static void serializeAndWriteMessageOrLogError(bool success, Message message, string path) + private static void SerializeAndWriteMessageOrLogError(bool success, Message message, string path) { if (success) { @@ -59,7 +60,7 @@ private static void serializeAndWriteMessageOrLogError(bool success, Message mes } } - private static string[] getPathsToV2Messages(string directory) + private static string[] GetPathsToV2Messages(string directory) { try { @@ -73,7 +74,7 @@ private static string[] getPathsToV2Messages(string directory) } } - private static Message readAndParseMessage(string path) + private static Message ReadAndParseMessage(string path) { try { @@ -87,7 +88,7 @@ private static Message readAndParseMessage(string path) } } - private static ReplacementOptions getReplacementOptions() + private static ReplacementOptions GetReplacementOptions() { var builder = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) From dbc7fb7f6ab7123e071afc0c203f3d84a550f8e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannick=20B=C3=B6rner?= Date: Thu, 1 Apr 2021 16:12:43 +0200 Subject: [PATCH 2/8] Add HashGenerator --- .../{ => Services}/Anonymizer.cs | 28 +++++++++++++------ .../Services/HashGenerator.cs | 19 +++++++++++++ Healex.HL7v2Anonymizer/appsettings.json | 6 ++-- 3 files changed, 42 insertions(+), 11 deletions(-) rename Healex.HL7v2Anonymizer/{ => Services}/Anonymizer.cs (62%) create mode 100644 Healex.HL7v2Anonymizer/Services/HashGenerator.cs diff --git a/Healex.HL7v2Anonymizer/Anonymizer.cs b/Healex.HL7v2Anonymizer/Services/Anonymizer.cs similarity index 62% rename from Healex.HL7v2Anonymizer/Anonymizer.cs rename to Healex.HL7v2Anonymizer/Services/Anonymizer.cs index 181702f..5650e40 100644 --- a/Healex.HL7v2Anonymizer/Anonymizer.cs +++ b/Healex.HL7v2Anonymizer/Services/Anonymizer.cs @@ -2,11 +2,11 @@ using System; using static Healex.HL7v2Anonymizer.ReplacementOptions; -namespace Healex.HL7v2Anonymizer +namespace Healex.HL7v2Anonymizer.Services { public class Anonymizer { - private ReplacementOptions _replacementOptions; + private readonly ReplacementOptions _replacementOptions; public Anonymizer(ReplacementOptions replacementOptions) { @@ -23,24 +23,38 @@ public bool Anonymize(Message message) { // Create new temporary message for each repeating segment // because we can't set values in all repeating segments at once - // We use references so this overwrites the original segments var tempMessage = new Message(); tempMessage.AddNewSegment(segment); foreach (Replacement replacement in segmentReplacement.Replacements) { - tryReplaceValue(replacement, tempMessage); + var replacementValue = GetReplacementValue(replacement, message); + TryReplaceValue(tempMessage, replacement.Path, replacementValue); } } } return isSuccess; } - private bool tryReplaceValue(Replacement replacement, Message message) + private string GetReplacementValue(Replacement replacement, Message message) + { + if (replacement.Value == "HASH") + { + var valueToHash = message.GetValue(replacement.Path); + var hashedValue = HashGenerator.HashString(valueToHash); + return hashedValue; + } + else + { + return replacement.Value; + } + } + + private bool TryReplaceValue(Message message, string path, string replacementValue) { try { - message.SetValue(replacement.Path, replacement.Value); + message.SetValue(path, replacementValue); } catch (HL7Exception) { @@ -54,6 +68,4 @@ private bool tryReplaceValue(Replacement replacement, Message message) return true; } } - - } diff --git a/Healex.HL7v2Anonymizer/Services/HashGenerator.cs b/Healex.HL7v2Anonymizer/Services/HashGenerator.cs new file mode 100644 index 0000000..4bb9b5f --- /dev/null +++ b/Healex.HL7v2Anonymizer/Services/HashGenerator.cs @@ -0,0 +1,19 @@ +using System; +using System.Security.Cryptography; +using System.Text; + +namespace Healex.HL7v2Anonymizer.Services +{ + public class HashGenerator + { + public static string HashString(string value) + { + var hasher = SHA512.Create(); + var hashedValue = hasher.ComputeHash(Encoding.UTF8.GetBytes(value)); + + var hashAsInt = BitConverter.ToInt32(hashedValue, 0); + var positiveHashedValue = Math.Abs(hashAsInt); + return positiveHashedValue.ToString(); + } + } +} diff --git a/Healex.HL7v2Anonymizer/appsettings.json b/Healex.HL7v2Anonymizer/appsettings.json index 48a68e9..d58464d 100644 --- a/Healex.HL7v2Anonymizer/appsettings.json +++ b/Healex.HL7v2Anonymizer/appsettings.json @@ -6,15 +6,15 @@ "Replacements": [ { "Path": "PID.2.1", - "Value": "Id" + "Value": "HASH" }, { "Path": "PID.3.1", - "Value": "Id" + "Value": "HASH" }, { "Path": "PID.4.1", - "Value": "Id" + "Value": "HASH" }, { "Path": "PID.5.1", From 5d09108e1505b76c1646fda8e9ccd5ab13d3d8f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannick=20B=C3=B6rner?= Date: Thu, 1 Apr 2021 16:13:01 +0200 Subject: [PATCH 3/8] Add tests for Hashgenerator. Adjust existing tests. --- .../AnonymizerTests.cs | 8 +++++++- .../HashGeneratorTests.cs | 20 +++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 Healex.HL7v2Anonymizer.Tests/HashGeneratorTests.cs diff --git a/Healex.HL7v2Anonymizer.Tests/AnonymizerTests.cs b/Healex.HL7v2Anonymizer.Tests/AnonymizerTests.cs index ee2a48e..c0fab29 100644 --- a/Healex.HL7v2Anonymizer.Tests/AnonymizerTests.cs +++ b/Healex.HL7v2Anonymizer.Tests/AnonymizerTests.cs @@ -1,3 +1,4 @@ +using Healex.HL7v2Anonymizer.Services; using HL7.Dotnetcore; using Microsoft.Extensions.Configuration; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -26,6 +27,7 @@ public void AnonymizerTestOru() public void TestAnonymization(string messageContent) { + var originalMessage = new Message(messageContent); var message = new Message(messageContent); // Setup @@ -43,7 +45,11 @@ public void TestAnonymization(string messageContent) { try { - Assert.AreEqual(message.GetValue(replacement.Path), replacement.Value); + var originalValue = originalMessage.GetValue(replacement.Path); + var newValue = message.GetValue(replacement.Path); + + Assert.AreNotEqual(originalValue, newValue); + Assert.IsTrue(newValue == replacement.Value || newValue == HashGenerator.HashString(originalValue)); } catch (HL7Exception) { diff --git a/Healex.HL7v2Anonymizer.Tests/HashGeneratorTests.cs b/Healex.HL7v2Anonymizer.Tests/HashGeneratorTests.cs new file mode 100644 index 0000000..0a985c6 --- /dev/null +++ b/Healex.HL7v2Anonymizer.Tests/HashGeneratorTests.cs @@ -0,0 +1,20 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Healex.HL7v2Anonymizer.Services; + + +namespace Healex.HL7v2Anonymizer.Tests +{ + [TestClass] + public class HashGeneratorTests + { + [TestMethod] + public void TestGetStringHash() + { + var testPid = "48234029834029834"; + var hashedtestPid = HashGenerator.HashString(testPid); + + Assert.IsNotNull(hashedtestPid); + Assert.AreEqual(hashedtestPid, "750678352"); + } + } +} From d13c72f51b32e013b6d720d814a04598109c3825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannick=20B=C3=B6rner?= Date: Thu, 1 Apr 2021 16:15:28 +0200 Subject: [PATCH 4/8] Add hashing for IDs by default. --- Healex.HL7v2Anonymizer/appsettings.json | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Healex.HL7v2Anonymizer/appsettings.json b/Healex.HL7v2Anonymizer/appsettings.json index d58464d..7e7454e 100644 --- a/Healex.HL7v2Anonymizer/appsettings.json +++ b/Healex.HL7v2Anonymizer/appsettings.json @@ -147,7 +147,7 @@ "Replacements": [ { "Path": "NK1.1.1", - "Value": "Id" + "Value":"HASH" }, { "Path": "NK1.2.1", @@ -332,7 +332,7 @@ }, { "Path": "IN1.7", - "Value": "Id" + "Value":"HASH" }, { "Path": "IN1.11.1", @@ -417,7 +417,7 @@ "Replacements": [ { "Path": "IN2.1.1", - "Value": "Id" + "Value":"HASH" }, { "Path": "IN2.2", @@ -469,11 +469,11 @@ }, { "Path": "IN2.25.1", - "Value": "ID" + "Value":"HASH" }, { "Path": "IN2.26.1", - "Value": "ID" + "Value":"HASH" }, { "Path": "IN2.40.1", From 3b32f53bbd9dea008cec924e164effd40ac93429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannick=20B=C3=B6rner?= Date: Thu, 1 Apr 2021 16:46:48 +0200 Subject: [PATCH 5/8] Update Readme --- README.md | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index c58297c..ce4ac4e 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,11 @@ # Healex.HL7v2Anonymizer -This console application allows you to anonymize HL7v2 messages. +This console application allows you to anonymize HL7v2 messages. The standard configuration anonymizes all identifiable data in HL7 v2 messages and hashes fields that include an ID. ## Motivation -The project was built to enable data stewards and scientists to share HL7v2 sample messages without identifiable data. +The project was built to enable anyone to share HL7v2 sample messages without identifiable data. ## How to use? @@ -16,6 +16,8 @@ The project was built to enable data stewards and scientists to share HL7v2 samp 2. Unzip it. 3. Run the application and enter the path to your v2 messages. Make sure to back them up prior to runing the application since the original messages will be overwritten. +## Configuration + This application will use the `appsettings.json` to read the values that are to be replaced for each segments and their corresponding subsegments. A segment is recognized by its `"Segment"` property. Each segment contains an array of replacements. A segment's subsegment can be identified by its `"Path"` property inside the replacements array. Subsegments will also have a value property that contains the value by which a value inside a HL7v2 message is to be replaced. @@ -36,6 +38,22 @@ Say for instance, you want to replace the value that is currently assigned for t } ``` +Use the "HASH" keyword to generate persistent, pseudonymized IDs. This function will always generate the same anonymized ID for a given ID in the HL7 v2 message. The hash function is one-way, so there is no way of reversing the pseudonymized ID back to its original ID. + +```json + { + "Segment": "PID", + "Replacements": [ + // ommited + { + "Path": "PID.1.1", + "Value": "HASH" <---- The value in PID.1.1 will be hashed, not overwritten + }, + // omitted + ] + } +``` + Adding additional segments works in a similar manner. Simply add a new segment to the `appsettings.json` after `"Segment": "IN2"`. Make sure to add a comma to the closing brace of this segment so the JSON file remains valid, then use this template to add a new segment. ```json From 00c091af2e1520eee341d9b583746523b2c8f488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannick=20B=C3=B6rner?= Date: Tue, 6 Apr 2021 08:44:50 +0200 Subject: [PATCH 6/8] Add error handling for missing values. --- Healex.HL7v2Anonymizer/Services/Anonymizer.cs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/Healex.HL7v2Anonymizer/Services/Anonymizer.cs b/Healex.HL7v2Anonymizer/Services/Anonymizer.cs index 5650e40..82ae376 100644 --- a/Healex.HL7v2Anonymizer/Services/Anonymizer.cs +++ b/Healex.HL7v2Anonymizer/Services/Anonymizer.cs @@ -40,14 +40,17 @@ private string GetReplacementValue(Replacement replacement, Message message) { if (replacement.Value == "HASH") { - var valueToHash = message.GetValue(replacement.Path); - var hashedValue = HashGenerator.HashString(valueToHash); - return hashedValue; - } - else - { - return replacement.Value; + try + { + var valueToHash = message.GetValue(replacement.Path); + var hashedValue = HashGenerator.HashString(valueToHash); + return hashedValue; + } catch + { + // Could not find a value to hash in the HL7 message + } } + return replacement.Value; } private bool TryReplaceValue(Message message, string path, string replacementValue) From 11056fcdc4b981e7298a2a0dfb89b1aafb42fab4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannick=20B=C3=B6rner?= Date: Tue, 6 Apr 2021 08:52:11 +0200 Subject: [PATCH 7/8] Improve error handling for faulty v2 messages --- Healex.HL7v2Anonymizer/Program.cs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/Healex.HL7v2Anonymizer/Program.cs b/Healex.HL7v2Anonymizer/Program.cs index 9d8676a..ee3110d 100644 --- a/Healex.HL7v2Anonymizer/Program.cs +++ b/Healex.HL7v2Anonymizer/Program.cs @@ -40,8 +40,11 @@ private static void TryAnonymizeMessages(string directory) foreach (string path in pathsToV2Messages) { var message = ReadAndParseMessage(path); - var success = anonymizer.Anonymize(message); - SerializeAndWriteMessageOrLogError(success, message, path); + if (message is not null) + { + var success = anonymizer.Anonymize(message); + SerializeAndWriteMessageOrLogError(success, message, path); + } } } } From 7460aa4c447e333c3b18961edac1c6a33389d223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yannick=20B=C3=B6rner?= Date: Tue, 6 Apr 2021 09:51:45 +0200 Subject: [PATCH 8/8] Set isSuccess --- Healex.HL7v2Anonymizer/Services/Anonymizer.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Healex.HL7v2Anonymizer/Services/Anonymizer.cs b/Healex.HL7v2Anonymizer/Services/Anonymizer.cs index 82ae376..beb7ee8 100644 --- a/Healex.HL7v2Anonymizer/Services/Anonymizer.cs +++ b/Healex.HL7v2Anonymizer/Services/Anonymizer.cs @@ -29,7 +29,7 @@ public bool Anonymize(Message message) foreach (Replacement replacement in segmentReplacement.Replacements) { var replacementValue = GetReplacementValue(replacement, message); - TryReplaceValue(tempMessage, replacement.Path, replacementValue); + isSuccess = TryReplaceValue(tempMessage, replacement.Path, replacementValue); } } }