From 2814b1b343edff40de12a0ba1849f4602b0946ad Mon Sep 17 00:00:00 2001 From: Sparow199 Date: Mon, 2 Mar 2020 12:14:05 +0100 Subject: [PATCH] feat(*): Add cities list to make label --- addok_france/utils.py | 53 ++++++++++++++++++-------------- tests/test_utils.py | 70 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 95 insertions(+), 28 deletions(-) diff --git a/addok_france/utils.py b/addok_france/utils.py index 779ab89..3778ada 100644 --- a/addok_france/utils.py +++ b/addok_france/utils.py @@ -37,13 +37,11 @@ # Match "rue", "boulevard", "bd", etc. TYPES_PATTERN = re.compile(r'\b(' + TYPES_REGEX + r')\b', flags=re.IGNORECASE) - # Match number + ordinal, once glued by glue_ordinal (or typed like this in the # search string, for example "6bis", "234ter"). FOLD_PATTERN = re.compile(r'^(\d{1,4})(' + ORDINAL_REGEX + ')$', flags=re.IGNORECASE) - # Match number once cleaned by glue_ordinal and fold_ordinal (for example # "6b", "234t"…) NUMBER_PATTERN = re.compile(r'\b\d{1,4}[a-z]?\b', flags=re.IGNORECASE) @@ -139,28 +137,37 @@ def make_labels(helper, result): housenumber = getattr(result, 'housenumber', None) def add(labels, label): - labels.insert(0, label) + labels.add(label) if housenumber: label = '{} {}'.format(housenumber, label) - labels.insert(0, label) + labels.add(label) + + raw_cities = result._rawattr("city") + + if isinstance(raw_cities, list): + cities = raw_cities + else: + cities = [raw_cities] - city = result.city postcode = result.postcode - names = result._rawattr('name') - if not isinstance(names, (list, tuple)): - names = [names] - for name in names: - labels = [] - label = name - if postcode and result.type == 'municipality': - add(labels, '{} {}'.format(label, postcode)) - add(labels, '{} {}'.format(postcode, label)) - add(labels, label) - if city and city != label: - add(labels, '{} {}'.format(label, city)) - if postcode: - label = '{} {}'.format(label, postcode) - add(labels, label) - label = '{} {}'.format(label, city) - add(labels, label) - result.labels.extend(labels) + names = getattr(result, 'name', None) + labels = set() + + for city in cities: + if not isinstance(names, (list, tuple)): + names = [names] + for name in names: + label = name + if postcode and result.type == 'municipality': + add(labels, '{} {}'.format(label, postcode)) + add(labels, '{} {}'.format(postcode, label)) + add(labels, label) + if city and city != label: + add(labels, '{} {}'.format(label, city)) + if postcode: + label = '{} {}'.format(label, postcode) + add(labels, label) + label = '{} {}'.format(label, city) + add(labels, label) + + result.labels.extend(sorted(list(labels), key=lambda item: (len(item), item), reverse=True)) diff --git a/tests/test_utils.py b/tests/test_utils.py index 969ee91..7132f2e 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,11 +1,11 @@ import json import pytest - from addok.batch import process_documents from addok.core import search, Result from addok.ds import get_document from addok.helpers.text import Token + from addok_france.utils import (clean_query, extract_address, flag_housenumber, fold_ordinal, glue_ordinal, make_labels, remove_leading_zeros) @@ -303,15 +303,75 @@ def test_make_labels(config): assert result.labels == [ '1 bis rue des Lilas 75010 Paris', 'rue des Lilas 75010 Paris', - '1 bis rue des Lilas 75010', - 'rue des Lilas 75010', '1 bis rue des Lilas Paris', + '1 bis rue des Lilas 75010', 'rue des Lilas Paris', + 'rue des Lilas 75010', '1 bis rue des Lilas', 'rue des Lilas' ] +def test_make_labels_merged_cities(config): + doc = { + "_id": "53543a313139353538390000", + "id": "53543a313139353538390000", + "type": "street", + "postcode": "49120", + "hexacleStreet": "492812226P", + "lat": "47.1469", + "lon": "-0.75745", + "context": "49, Maine-et-Loire, Pays de la Loire", + "importance": 1, + "userLabel": "RUE PIERRE LEPOUREAU", + "name": "RUE PIERRE LEPOUREAU", + "housenumbers": { + "2 BIS": { + "hexacleNumber": "49281222UE", + "lat": "47.1504", + "lon": "-0.757414" + } + }, + "cityAfnorLabel": "CHEMILLE EN ANJOU", + "userCityLabel": "ST GEORGES DES GARDES (CHEMILLE EN ANJOU)", + "cityAliasAfnorLabel": "ST GEORGES DES GARDES", + "city": [ + "ST GEORGES DES GARDES (CHEMILLE EN ANJOU)", + "ST GEORGES DES GARDES", + "CHEMILLE EN ANJOU", + "SAINT GEORGES DES GARDES" + ] + } + + process_documents(json.dumps(doc)) + result = Result(get_document('d|53543a313139353538390000')) + result.housenumber = '2 bis' # Simulate match_housenumber + make_labels(None, result) + + assert result.labels == [ + '2 bis RUE PIERRE LEPOUREAU 49120 ST GEORGES DES GARDES (CHEMILLE EN ANJOU)', + 'RUE PIERRE LEPOUREAU 49120 ST GEORGES DES GARDES (CHEMILLE EN ANJOU)', + '2 bis RUE PIERRE LEPOUREAU ST GEORGES DES GARDES (CHEMILLE EN ANJOU)', + 'RUE PIERRE LEPOUREAU ST GEORGES DES GARDES (CHEMILLE EN ANJOU)', + '2 bis RUE PIERRE LEPOUREAU 49120 SAINT GEORGES DES GARDES', + '2 bis RUE PIERRE LEPOUREAU 49120 ST GEORGES DES GARDES', + 'RUE PIERRE LEPOUREAU 49120 SAINT GEORGES DES GARDES', + '2 bis RUE PIERRE LEPOUREAU SAINT GEORGES DES GARDES', + '2 bis RUE PIERRE LEPOUREAU 49120 CHEMILLE EN ANJOU', + 'RUE PIERRE LEPOUREAU 49120 ST GEORGES DES GARDES', + '2 bis RUE PIERRE LEPOUREAU ST GEORGES DES GARDES', + 'RUE PIERRE LEPOUREAU SAINT GEORGES DES GARDES', + 'RUE PIERRE LEPOUREAU 49120 CHEMILLE EN ANJOU', + '2 bis RUE PIERRE LEPOUREAU CHEMILLE EN ANJOU', + 'RUE PIERRE LEPOUREAU ST GEORGES DES GARDES', + 'RUE PIERRE LEPOUREAU CHEMILLE EN ANJOU', + '2 bis RUE PIERRE LEPOUREAU 49120', + 'RUE PIERRE LEPOUREAU 49120', + '2 bis RUE PIERRE LEPOUREAU', + 'RUE PIERRE LEPOUREAU' + ] + + def test_make_municipality_labels(config): doc = { 'id': 'xxxx', @@ -327,7 +387,7 @@ def test_make_municipality_labels(config): result = Result(get_document('d|yyyy')) make_labels(None, result) assert result.labels == [ - 'Lille', - '59000 Lille', 'Lille 59000', + '59000 Lille', + 'Lille' ]