From 3044c82637d7f881af0947f947223023ad0ca995 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Doumouro?= Date: Fri, 5 May 2017 16:49:29 +0200 Subject: [PATCH 1/2] Properly handle builtin entities --- snips_nlu/built_in_entities.py | 2 +- snips_nlu/dataset.py | 4 +-- .../intent_parser/regex_intent_parser.py | 4 ++- snips_nlu/nlu_engine.py | 5 ++- snips_nlu/slot_filler/data_augmentation.py | 13 +++++--- snips_nlu/tests/test_nlu_engine.py | 32 +++++++++++++++++++ 6 files changed, 51 insertions(+), 9 deletions(-) diff --git a/snips_nlu/built_in_entities.py b/snips_nlu/built_in_entities.py index c39e10968..83dfeb08e 100644 --- a/snips_nlu/built_in_entities.py +++ b/snips_nlu/built_in_entities.py @@ -106,5 +106,5 @@ def clear_cache(): _DUCKLING_CACHE.clear() -def is_built_in_entity(entity_label): +def is_builtin_entity(entity_label): return entity_label in BuiltInEntity.built_in_entity_by_label diff --git a/snips_nlu/dataset.py b/snips_nlu/dataset.py index 6806ba83b..9895e9e7c 100644 --- a/snips_nlu/dataset.py +++ b/snips_nlu/dataset.py @@ -1,7 +1,7 @@ import re from copy import deepcopy -from snips_nlu.built_in_entities import BuiltInEntity, is_built_in_entity +from snips_nlu.built_in_entities import BuiltInEntity, is_builtin_entity from snips_nlu.constants import (TEXT, USE_SYNONYMS, SYNONYMS, DATA, INTENTS, ENTITIES, ENTITY, SLOT_NAME, UTTERANCES, LANGUAGE, VALUE, AUTOMATICALLY_EXTENSIBLE, @@ -24,7 +24,7 @@ def validate_and_format_dataset(dataset): entities = set() for entity_name, entity in dataset[ENTITIES].iteritems(): entities.add(entity_name) - if is_built_in_entity(entity_name): + if is_builtin_entity(entity_name): validate_entity = validate_and_format_builtin_entity else: validate_entity = validate_and_format_custom_entity diff --git a/snips_nlu/intent_parser/regex_intent_parser.py b/snips_nlu/intent_parser/regex_intent_parser.py index 14882cc4d..f119b442e 100644 --- a/snips_nlu/intent_parser/regex_intent_parser.py +++ b/snips_nlu/intent_parser/regex_intent_parser.py @@ -1,6 +1,6 @@ import re -from snips_nlu.built_in_entities import BuiltInEntity +from snips_nlu.built_in_entities import BuiltInEntity, is_builtin_entity from snips_nlu.constants import (TEXT, USE_SYNONYMS, SYNONYMS, DATA, INTENTS, ENTITIES, SLOT_NAME, UTTERANCES, VALUE, ENTITY, CUSTOM_ENGINE) @@ -80,6 +80,8 @@ def generate_regexes(intent_queries, joined_entity_utterances, def get_joined_entity_utterances(dataset): joined_entity_utterances = dict() for entity_name, entity in dataset[ENTITIES].iteritems(): + if is_builtin_entity(entity_name): + continue if entity[USE_SYNONYMS]: utterances = [syn for entry in entity[DATA] for syn in entry[SYNONYMS]] diff --git a/snips_nlu/nlu_engine.py b/snips_nlu/nlu_engine.py index fa7b8fa22..445c66eda 100644 --- a/snips_nlu/nlu_engine.py +++ b/snips_nlu/nlu_engine.py @@ -5,7 +5,8 @@ from itertools import groupby, permutations from dataset import validate_and_format_dataset, filter_dataset -from snips_nlu.built_in_entities import BuiltInEntity, get_built_in_entities +from snips_nlu.built_in_entities import BuiltInEntity, get_built_in_entities, \ + is_builtin_entity from snips_nlu.constants import ( INTENTS, ENTITIES, UTTERANCES, LANGUAGE, VALUE, AUTOMATICALLY_EXTENSIBLE, ENTITY, BUILTIN_PARSER, CUSTOM_ENGINE, MATCH_RANGE, DATA, SLOT_NAME, @@ -170,6 +171,8 @@ def get_intent_custom_entities(dataset, intent): def snips_nlu_entities(dataset): entities = dict() for entity_name, entity in dataset[ENTITIES].iteritems(): + if is_builtin_entity(entity_name): + continue entity_data = dict() use_synonyms = entity[USE_SYNONYMS] automatically_extensible = entity[AUTOMATICALLY_EXTENSIBLE] diff --git a/snips_nlu/slot_filler/data_augmentation.py b/snips_nlu/slot_filler/data_augmentation.py index a7334966d..9c68598b8 100644 --- a/snips_nlu/slot_filler/data_augmentation.py +++ b/snips_nlu/slot_filler/data_augmentation.py @@ -4,6 +4,7 @@ import numpy as np +from snips_nlu.built_in_entities import is_builtin_entity from snips_nlu.constants import (UTTERANCES, DATA, ENTITY, USE_SYNONYMS, SYNONYMS, VALUE, TEXT, INTENTS, ENTITIES) from snips_nlu.resources import get_subtitles @@ -17,10 +18,13 @@ def generate_utterance(contexts_iterator, entities_iterators, noise_iterator, for i, chunk in enumerate(context[DATA]): if ENTITY in chunk: has_entity = True - new_chunk = dict(chunk) - new_chunk[TEXT] = deepcopy( - next(entities_iterators[new_chunk[ENTITY]])) - context_data.append(new_chunk) + if not is_builtin_entity(chunk[ENTITY]): + new_chunk = dict(chunk) + new_chunk[TEXT] = deepcopy( + next(entities_iterators[new_chunk[ENTITY]])) + context_data.append(new_chunk) + else: + context_data.append(chunk) else: has_entity = False context_data.append(chunk) @@ -89,6 +93,7 @@ def augment_utterances(dataset, intent_name, language, max_utterances, noise_iterator = get_noise_iterator(language, min_noise_size, max_noise_size) intent_entities = get_intent_entities(dataset, intent_name) + intent_entities = [e for e in intent_entities if not is_builtin_entity(e)] entities_its = get_entities_iterators(dataset, intent_entities) generated_utterances = [] while nb_to_generate > 0: diff --git a/snips_nlu/tests/test_nlu_engine.py b/snips_nlu/tests/test_nlu_engine.py index d1b8e8c56..b9c71ec09 100644 --- a/snips_nlu/tests/test_nlu_engine.py +++ b/snips_nlu/tests/test_nlu_engine.py @@ -488,3 +488,35 @@ def test_parse_with_builtin_force_should_return_custom_when_overlapping( } self.assertEqual(results, expected_results) + + def test_engine_should_fit_with_builtins_entities(self): + # Given + language = Language.EN + dataset = validate_and_format_dataset({ + "intents": { + "dummy": { + ENGINE_TYPE: CUSTOM_ENGINE, + "utterances": [ + { + "data": [ + { + "text": "10p.m.", + "entity": "snips/datetime", + "slot_name": "startTime" + } + ] + } + ] + } + }, + "entities": { + "snips/datetime": {} + }, + "language": language.iso_code + }) + + # When / Then + # try: + SnipsNLUEngine(language).fit(dataset) + # except: + # self.fail("NLU engine should fit builtin") From c0a19e0a19532f5cbd4f15e0ce178683dc11b36f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Doumouro?= Date: Fri, 5 May 2017 16:50:34 +0200 Subject: [PATCH 2/2] Bump version number --- snips_nlu/__version__ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snips_nlu/__version__ b/snips_nlu/__version__ index 60a2d3e96..44bb5d1f7 100644 --- a/snips_nlu/__version__ +++ b/snips_nlu/__version__ @@ -1 +1 @@ -0.4.0 \ No newline at end of file +0.4.1 \ No newline at end of file