diff --git a/Healex.HL7v2Anonymizer.Tests/AnonymizerTests.cs b/Healex.HL7v2Anonymizer.Tests/AnonymizerTests.cs index ee2a48e..c0fab29 100644 --- a/Healex.HL7v2Anonymizer.Tests/AnonymizerTests.cs +++ b/Healex.HL7v2Anonymizer.Tests/AnonymizerTests.cs @@ -1,3 +1,4 @@ +using Healex.HL7v2Anonymizer.Services; using HL7.Dotnetcore; using Microsoft.Extensions.Configuration; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -26,6 +27,7 @@ public void AnonymizerTestOru() public void TestAnonymization(string messageContent) { + var originalMessage = new Message(messageContent); var message = new Message(messageContent); // Setup @@ -43,7 +45,11 @@ public void TestAnonymization(string messageContent) { try { - Assert.AreEqual(message.GetValue(replacement.Path), replacement.Value); + var originalValue = originalMessage.GetValue(replacement.Path); + var newValue = message.GetValue(replacement.Path); + + Assert.AreNotEqual(originalValue, newValue); + Assert.IsTrue(newValue == replacement.Value || newValue == HashGenerator.HashString(originalValue)); } catch (HL7Exception) { diff --git a/Healex.HL7v2Anonymizer.Tests/HashGeneratorTests.cs b/Healex.HL7v2Anonymizer.Tests/HashGeneratorTests.cs new file mode 100644 index 0000000..0a985c6 --- /dev/null +++ b/Healex.HL7v2Anonymizer.Tests/HashGeneratorTests.cs @@ -0,0 +1,20 @@ +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Healex.HL7v2Anonymizer.Services; + + +namespace Healex.HL7v2Anonymizer.Tests +{ + [TestClass] + public class HashGeneratorTests + { + [TestMethod] + public void TestGetStringHash() + { + var testPid = "48234029834029834"; + var hashedtestPid = HashGenerator.HashString(testPid); + + Assert.IsNotNull(hashedtestPid); + Assert.AreEqual(hashedtestPid, "750678352"); + } + } +} diff --git a/Healex.HL7v2Anonymizer/Program.cs b/Healex.HL7v2Anonymizer/Program.cs index a1756cd..ee3110d 100644 --- a/Healex.HL7v2Anonymizer/Program.cs +++ b/Healex.HL7v2Anonymizer/Program.cs @@ -1,4 +1,5 @@ -using HL7.Dotnetcore; +using Healex.HL7v2Anonymizer.Services; +using HL7.Dotnetcore; using Microsoft.Extensions.Configuration; using System; using System.IO; @@ -9,10 +10,10 @@ class Program { static void Main(string[] args) { - waitForInput(); + WaitForInput(); } - private static void waitForInput() + private static void WaitForInput() { Console.WriteLine("Welcome to the Healex HL7v2 anonymizer!"); Console.WriteLine("---------------------------------------------------------------------------------------"); @@ -21,15 +22,15 @@ private static void waitForInput() Console.WriteLine("Enter the directory to your v2 messages and press enter:"); var directory = Console.ReadLine(); Console.WriteLine(); - tryAnonymizeMessages(directory); + TryAnonymizeMessages(directory); Console.WriteLine(); - waitForInput(); + WaitForInput(); } - private static void tryAnonymizeMessages(string directory) + private static void TryAnonymizeMessages(string directory) { - var pathsToV2Messages = getPathsToV2Messages(directory); - var anonymizer = new Anonymizer(getReplacementOptions()); + var pathsToV2Messages = GetPathsToV2Messages(directory); + var anonymizer = new Anonymizer(GetReplacementOptions()); if (pathsToV2Messages is not null) { @@ -38,14 +39,17 @@ private static void tryAnonymizeMessages(string directory) foreach (string path in pathsToV2Messages) { - var message = readAndParseMessage(path); - var success = anonymizer.Anonymize(message); - serializeAndWriteMessageOrLogError(success, message, path); + var message = ReadAndParseMessage(path); + if (message is not null) + { + var success = anonymizer.Anonymize(message); + SerializeAndWriteMessageOrLogError(success, message, path); + } } } } - private static void serializeAndWriteMessageOrLogError(bool success, Message message, string path) + private static void SerializeAndWriteMessageOrLogError(bool success, Message message, string path) { if (success) { @@ -59,7 +63,7 @@ private static void serializeAndWriteMessageOrLogError(bool success, Message mes } } - private static string[] getPathsToV2Messages(string directory) + private static string[] GetPathsToV2Messages(string directory) { try { @@ -73,7 +77,7 @@ private static string[] getPathsToV2Messages(string directory) } } - private static Message readAndParseMessage(string path) + private static Message ReadAndParseMessage(string path) { try { @@ -87,7 +91,7 @@ private static Message readAndParseMessage(string path) } } - private static ReplacementOptions getReplacementOptions() + private static ReplacementOptions GetReplacementOptions() { var builder = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) diff --git a/Healex.HL7v2Anonymizer/Anonymizer.cs b/Healex.HL7v2Anonymizer/Services/Anonymizer.cs similarity index 58% rename from Healex.HL7v2Anonymizer/Anonymizer.cs rename to Healex.HL7v2Anonymizer/Services/Anonymizer.cs index 181702f..beb7ee8 100644 --- a/Healex.HL7v2Anonymizer/Anonymizer.cs +++ b/Healex.HL7v2Anonymizer/Services/Anonymizer.cs @@ -2,11 +2,11 @@ using System; using static Healex.HL7v2Anonymizer.ReplacementOptions; -namespace Healex.HL7v2Anonymizer +namespace Healex.HL7v2Anonymizer.Services { public class Anonymizer { - private ReplacementOptions _replacementOptions; + private readonly ReplacementOptions _replacementOptions; public Anonymizer(ReplacementOptions replacementOptions) { @@ -23,24 +23,41 @@ public bool Anonymize(Message message) { // Create new temporary message for each repeating segment // because we can't set values in all repeating segments at once - // We use references so this overwrites the original segments var tempMessage = new Message(); tempMessage.AddNewSegment(segment); foreach (Replacement replacement in segmentReplacement.Replacements) { - tryReplaceValue(replacement, tempMessage); + var replacementValue = GetReplacementValue(replacement, message); + isSuccess = TryReplaceValue(tempMessage, replacement.Path, replacementValue); } } } return isSuccess; } - private bool tryReplaceValue(Replacement replacement, Message message) + private string GetReplacementValue(Replacement replacement, Message message) + { + if (replacement.Value == "HASH") + { + try + { + var valueToHash = message.GetValue(replacement.Path); + var hashedValue = HashGenerator.HashString(valueToHash); + return hashedValue; + } catch + { + // Could not find a value to hash in the HL7 message + } + } + return replacement.Value; + } + + private bool TryReplaceValue(Message message, string path, string replacementValue) { try { - message.SetValue(replacement.Path, replacement.Value); + message.SetValue(path, replacementValue); } catch (HL7Exception) { @@ -54,6 +71,4 @@ private bool tryReplaceValue(Replacement replacement, Message message) return true; } } - - } diff --git a/Healex.HL7v2Anonymizer/Services/HashGenerator.cs b/Healex.HL7v2Anonymizer/Services/HashGenerator.cs new file mode 100644 index 0000000..4bb9b5f --- /dev/null +++ b/Healex.HL7v2Anonymizer/Services/HashGenerator.cs @@ -0,0 +1,19 @@ +using System; +using System.Security.Cryptography; +using System.Text; + +namespace Healex.HL7v2Anonymizer.Services +{ + public class HashGenerator + { + public static string HashString(string value) + { + var hasher = SHA512.Create(); + var hashedValue = hasher.ComputeHash(Encoding.UTF8.GetBytes(value)); + + var hashAsInt = BitConverter.ToInt32(hashedValue, 0); + var positiveHashedValue = Math.Abs(hashAsInt); + return positiveHashedValue.ToString(); + } + } +} diff --git a/Healex.HL7v2Anonymizer/appsettings.json b/Healex.HL7v2Anonymizer/appsettings.json index 48a68e9..7e7454e 100644 --- a/Healex.HL7v2Anonymizer/appsettings.json +++ b/Healex.HL7v2Anonymizer/appsettings.json @@ -6,15 +6,15 @@ "Replacements": [ { "Path": "PID.2.1", - "Value": "Id" + "Value": "HASH" }, { "Path": "PID.3.1", - "Value": "Id" + "Value": "HASH" }, { "Path": "PID.4.1", - "Value": "Id" + "Value": "HASH" }, { "Path": "PID.5.1", @@ -147,7 +147,7 @@ "Replacements": [ { "Path": "NK1.1.1", - "Value": "Id" + "Value":"HASH" }, { "Path": "NK1.2.1", @@ -332,7 +332,7 @@ }, { "Path": "IN1.7", - "Value": "Id" + "Value":"HASH" }, { "Path": "IN1.11.1", @@ -417,7 +417,7 @@ "Replacements": [ { "Path": "IN2.1.1", - "Value": "Id" + "Value":"HASH" }, { "Path": "IN2.2", @@ -469,11 +469,11 @@ }, { "Path": "IN2.25.1", - "Value": "ID" + "Value":"HASH" }, { "Path": "IN2.26.1", - "Value": "ID" + "Value":"HASH" }, { "Path": "IN2.40.1", diff --git a/README.md b/README.md index c58297c..ce4ac4e 100644 --- a/README.md +++ b/README.md @@ -2,11 +2,11 @@ # Healex.HL7v2Anonymizer -This console application allows you to anonymize HL7v2 messages. +This console application allows you to anonymize HL7v2 messages. The standard configuration anonymizes all identifiable data in HL7 v2 messages and hashes fields that include an ID. ## Motivation -The project was built to enable data stewards and scientists to share HL7v2 sample messages without identifiable data. +The project was built to enable anyone to share HL7v2 sample messages without identifiable data. ## How to use? @@ -16,6 +16,8 @@ The project was built to enable data stewards and scientists to share HL7v2 samp 2. Unzip it. 3. Run the application and enter the path to your v2 messages. Make sure to back them up prior to runing the application since the original messages will be overwritten. +## Configuration + This application will use the `appsettings.json` to read the values that are to be replaced for each segments and their corresponding subsegments. A segment is recognized by its `"Segment"` property. Each segment contains an array of replacements. A segment's subsegment can be identified by its `"Path"` property inside the replacements array. Subsegments will also have a value property that contains the value by which a value inside a HL7v2 message is to be replaced. @@ -36,6 +38,22 @@ Say for instance, you want to replace the value that is currently assigned for t } ``` +Use the "HASH" keyword to generate persistent, pseudonymized IDs. This function will always generate the same anonymized ID for a given ID in the HL7 v2 message. The hash function is one-way, so there is no way of reversing the pseudonymized ID back to its original ID. + +```json + { + "Segment": "PID", + "Replacements": [ + // ommited + { + "Path": "PID.1.1", + "Value": "HASH" <---- The value in PID.1.1 will be hashed, not overwritten + }, + // omitted + ] + } +``` + Adding additional segments works in a similar manner. Simply add a new segment to the `appsettings.json` after `"Segment": "IN2"`. Make sure to add a comma to the closing brace of this segment so the JSON file remains valid, then use this template to add a new segment. ```json