Skip to content

Commit

Permalink
Merge pull request #214 from snipsco/release/0.4.0
Browse files Browse the repository at this point in the history
Properly handle builtin entities
  • Loading branch information
ClemDoum authored May 5, 2017
2 parents e3303d7 + c0a19e0 commit 22ac674
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 10 deletions.
2 changes: 1 addition & 1 deletion snips_nlu/__version__
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4.0
0.4.1
2 changes: 1 addition & 1 deletion snips_nlu/built_in_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,5 +106,5 @@ def clear_cache():
_DUCKLING_CACHE.clear()


def is_built_in_entity(entity_label):
def is_builtin_entity(entity_label):
return entity_label in BuiltInEntity.built_in_entity_by_label
4 changes: 2 additions & 2 deletions snips_nlu/dataset.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import re
from copy import deepcopy

from snips_nlu.built_in_entities import BuiltInEntity, is_built_in_entity
from snips_nlu.built_in_entities import BuiltInEntity, is_builtin_entity
from snips_nlu.constants import (TEXT, USE_SYNONYMS, SYNONYMS, DATA, INTENTS,
ENTITIES, ENTITY, SLOT_NAME, UTTERANCES,
LANGUAGE, VALUE, AUTOMATICALLY_EXTENSIBLE,
Expand All @@ -24,7 +24,7 @@ def validate_and_format_dataset(dataset):
entities = set()
for entity_name, entity in dataset[ENTITIES].iteritems():
entities.add(entity_name)
if is_built_in_entity(entity_name):
if is_builtin_entity(entity_name):
validate_entity = validate_and_format_builtin_entity
else:
validate_entity = validate_and_format_custom_entity
Expand Down
4 changes: 3 additions & 1 deletion snips_nlu/intent_parser/regex_intent_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import re

from snips_nlu.built_in_entities import BuiltInEntity
from snips_nlu.built_in_entities import BuiltInEntity, is_builtin_entity
from snips_nlu.constants import (TEXT, USE_SYNONYMS, SYNONYMS, DATA, INTENTS,
ENTITIES, SLOT_NAME, UTTERANCES, VALUE,
ENTITY, CUSTOM_ENGINE)
Expand Down Expand Up @@ -80,6 +80,8 @@ def generate_regexes(intent_queries, joined_entity_utterances,
def get_joined_entity_utterances(dataset):
joined_entity_utterances = dict()
for entity_name, entity in dataset[ENTITIES].iteritems():
if is_builtin_entity(entity_name):
continue
if entity[USE_SYNONYMS]:
utterances = [syn for entry in entity[DATA]
for syn in entry[SYNONYMS]]
Expand Down
5 changes: 4 additions & 1 deletion snips_nlu/nlu_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
from itertools import groupby, permutations

from dataset import validate_and_format_dataset, filter_dataset
from snips_nlu.built_in_entities import BuiltInEntity, get_built_in_entities
from snips_nlu.built_in_entities import BuiltInEntity, get_built_in_entities, \
is_builtin_entity
from snips_nlu.constants import (
INTENTS, ENTITIES, UTTERANCES, LANGUAGE, VALUE, AUTOMATICALLY_EXTENSIBLE,
ENTITY, BUILTIN_PARSER, CUSTOM_ENGINE, MATCH_RANGE, DATA, SLOT_NAME,
Expand Down Expand Up @@ -170,6 +171,8 @@ def get_intent_custom_entities(dataset, intent):
def snips_nlu_entities(dataset):
entities = dict()
for entity_name, entity in dataset[ENTITIES].iteritems():
if is_builtin_entity(entity_name):
continue
entity_data = dict()
use_synonyms = entity[USE_SYNONYMS]
automatically_extensible = entity[AUTOMATICALLY_EXTENSIBLE]
Expand Down
13 changes: 9 additions & 4 deletions snips_nlu/slot_filler/data_augmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np

from snips_nlu.built_in_entities import is_builtin_entity
from snips_nlu.constants import (UTTERANCES, DATA, ENTITY, USE_SYNONYMS,
SYNONYMS, VALUE, TEXT, INTENTS, ENTITIES)
from snips_nlu.resources import get_subtitles
Expand All @@ -17,10 +18,13 @@ def generate_utterance(contexts_iterator, entities_iterators, noise_iterator,
for i, chunk in enumerate(context[DATA]):
if ENTITY in chunk:
has_entity = True
new_chunk = dict(chunk)
new_chunk[TEXT] = deepcopy(
next(entities_iterators[new_chunk[ENTITY]]))
context_data.append(new_chunk)
if not is_builtin_entity(chunk[ENTITY]):
new_chunk = dict(chunk)
new_chunk[TEXT] = deepcopy(
next(entities_iterators[new_chunk[ENTITY]]))
context_data.append(new_chunk)
else:
context_data.append(chunk)
else:
has_entity = False
context_data.append(chunk)
Expand Down Expand Up @@ -89,6 +93,7 @@ def augment_utterances(dataset, intent_name, language, max_utterances,
noise_iterator = get_noise_iterator(language, min_noise_size,
max_noise_size)
intent_entities = get_intent_entities(dataset, intent_name)
intent_entities = [e for e in intent_entities if not is_builtin_entity(e)]
entities_its = get_entities_iterators(dataset, intent_entities)
generated_utterances = []
while nb_to_generate > 0:
Expand Down
32 changes: 32 additions & 0 deletions snips_nlu/tests/test_nlu_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -488,3 +488,35 @@ def test_parse_with_builtin_force_should_return_custom_when_overlapping(
}

self.assertEqual(results, expected_results)

def test_engine_should_fit_with_builtins_entities(self):
# Given
language = Language.EN
dataset = validate_and_format_dataset({
"intents": {
"dummy": {
ENGINE_TYPE: CUSTOM_ENGINE,
"utterances": [
{
"data": [
{
"text": "10p.m.",
"entity": "snips/datetime",
"slot_name": "startTime"
}
]
}
]
}
},
"entities": {
"snips/datetime": {}
},
"language": language.iso_code
})

# When / Then
# try:
SnipsNLUEngine(language).fit(dataset)
# except:
# self.fail("NLU engine should fit builtin")

0 comments on commit 22ac674

Please sign in to comment.