From e961f6bf1c82ecaeb6592c0b8107be6c596f9278 Mon Sep 17 00:00:00 2001 From: Jonas Sander <29028262+Jonas-Sander@users.noreply.github.com> Date: Sun, 5 Nov 2023 22:58:46 +0100 Subject: [PATCH] Update privacy policy anchor hash algorithm. (#1146) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We now replace umlaute and diacritics. | Input | Old | New | |--------|--------|--------| | 1. Einführung | 1-einfhrung | 1-einfuehrung | | abc pokémon | abc-pokmon | abc-pokemon | Fixes #1145 --- .../src/ui/sharezone_markdown_extensions.dart | 26 +++++- app/pubspec.lock | 8 ++ app/pubspec.yaml | 1 + .../generate_anchor_hash_test.dart | 89 ++++++++++++++----- 4 files changed, 98 insertions(+), 26 deletions(-) diff --git a/app/lib/privacy_policy/src/ui/sharezone_markdown_extensions.dart b/app/lib/privacy_policy/src/ui/sharezone_markdown_extensions.dart index 57e19add2..e8c9654dc 100644 --- a/app/lib/privacy_policy/src/ui/sharezone_markdown_extensions.dart +++ b/app/lib/privacy_policy/src/ui/sharezone_markdown_extensions.dart @@ -7,12 +7,11 @@ // SPDX-License-Identifier: EUPL-1.2 import 'package:markdown/markdown.dart'; +import 'package:diacritic/diacritic.dart' as pkg; /// We define our own classes for automatic ID generation as we don't want any /// chance of breakage if `package:markdown` changes/updates its algorithm. /// -/// It's the exact same algorithm (for now). -/// /// It might break if we e.g. would use one updated algorithm in our backend to /// generate the anchor ids for the table of contents section list, but our /// client uses an old algorithm to generate/search for the anchor ids inside @@ -86,5 +85,28 @@ String _generateAnchorHashFromElement(Element element) => String generateAnchorHash(String text) => text .toLowerCase() .trim() + .replaceAllMapped(regexp, (char) => _replacementMap[char.group(0)]!) + .removeDiacritics() .replaceAll(RegExp(r'[^a-z0-9 _-]'), '') .replaceAll(RegExp(r'\s'), '-'); + +// Match every character in [_replacementMap.keys]. +RegExp get regexp => RegExp(_replacementMap.keys.join('|')); + +const _replacementMap = { + 'Ä': 'ae', + 'ä': 'ae', + 'Ö': 'oe', + 'ö': 'oe', + 'Ü': 'ue', + 'ü': 'ue', + 'ẞ': 'ss', + 'ß': 'ss', +}; + +extension on String { + /// E.g. `é` -> `e` + String removeDiacritics() { + return pkg.removeDiacritics(this); + } +} diff --git a/app/pubspec.lock b/app/pubspec.lock index 9d9cc44b7..fd73b7ec7 100644 --- a/app/pubspec.lock +++ b/app/pubspec.lock @@ -484,6 +484,14 @@ packages: url: "https://pub.dev" source: hosted version: "7.0.0" + diacritic: + dependency: "direct main" + description: + name: diacritic + sha256: a84e03ec2779375fb86430dbe9d8fba62c68376f2499097a5f6e75556babe706 + url: "https://pub.dev" + source: hosted + version: "0.1.4" diff_match_patch: dependency: transitive description: diff --git a/app/pubspec.yaml b/app/pubspec.yaml index b334de311..e14e80274 100644 --- a/app/pubspec.yaml +++ b/app/pubspec.yaml @@ -58,6 +58,7 @@ dependencies: path: ../lib/date design: path: ../lib/design + diacritic: ^0.1.4 dio: ^5.3.2 dynamic_links: path: ../lib/dynamic_links diff --git a/app/test/privacy_policy/generate_anchor_hash_test.dart b/app/test/privacy_policy/generate_anchor_hash_test.dart index e2358740d..fd692c643 100644 --- a/app/test/privacy_policy/generate_anchor_hash_test.dart +++ b/app/test/privacy_policy/generate_anchor_hash_test.dart @@ -12,29 +12,70 @@ import 'package:sharezone/privacy_policy/src/privacy_policy_src.dart'; void main() { /// We are testing this since changing the anchor hash generation algorithm /// might break stuff. See comments in [generateAnchorHash] file. - test('Generates correct anchor hashes', () { - final sections = [ - '1. Einführung', - '2. Kontaktinformationen', - '3. Wichtige Begriffe, die du kennen solltest', - '4. Welche Informationen erfassen wir grundsätzlich?', - '5. An wen geben wir deine Daten weiter?', - '6. Wie lange speichern wir deine Daten?', - '7. Welche Rechte hast du?', - 'Glückwunsch, du hast es geschafft' - ]; - - final anchorHashes = sections.map(generateAnchorHash).toList(); - - expect(anchorHashes, [ - '1-einfhrung', - '2-kontaktinformationen', - '3-wichtige-begriffe-die-du-kennen-solltest', - '4-welche-informationen-erfassen-wir-grundstzlich', - '5-an-wen-geben-wir-deine-daten-weiter', - '6-wie-lange-speichern-wir-deine-daten', - '7-welche-rechte-hast-du', - 'glckwunsch-du-hast-es-geschafft', - ]); + group('Generates correct anchor hashes', () { + test('real life examples', () { + final sections = [ + '1. Einführung', + '2. Kontaktinformationen', + '3. Wichtige Begriffe, die du kennen solltest', + '4. Welche Informationen erfassen wir grundsätzlich?', + '5. An wen geben wir deine Daten weiter?', + '6. Wie lange speichern wir deine Daten?', + '7. Welche Rechte hast du?', + 'Glückwunsch, du hast es geschafft', + ]; + + final anchorHashes = sections.map(generateAnchorHash).toList(); + + expect(anchorHashes, [ + '1-einfuehrung', + '2-kontaktinformationen', + '3-wichtige-begriffe-die-du-kennen-solltest', + '4-welche-informationen-erfassen-wir-grundsaetzlich', + '5-an-wen-geben-wir-deine-daten-weiter', + '6-wie-lange-speichern-wir-deine-daten', + '7-welche-rechte-hast-du', + 'glueckwunsch-du-hast-es-geschafft', + ]); + }); + test('replaces special chars', () { + final sections = [ + r'1-?!=""§$pokémon~+', + 'Ist das eine Frage???', + 'Wort-mit-Bindestrichen' + ]; + + final anchorHashes = sections.map(generateAnchorHash).toList(); + + expect(anchorHashes, [ + '1-pokemon', + 'ist-das-eine-frage', + 'wort-mit-bindestrichen', + ]); + }); + + test('replaces umlaute', () { + final umlaute = [ + 'ä', + 'Ä', + 'ö', + 'Ö', + 'ü', + 'Ü', + 'ß', + ]; + + final anchorHashes = umlaute.map(generateAnchorHash).toList(); + + expect(anchorHashes, [ + 'ae', + 'ae', + 'oe', + 'oe', + 'ue', + 'ue', + 'ss', + ]); + }); }); }