Skip to content

Commit

Permalink
Merge pull request #768 from snipsco/release/0.19.4
Browse files Browse the repository at this point in the history
Release 0.19.4
  • Loading branch information
adrienball authored Mar 6, 2019
2 parents 71f9599 + 532c840 commit 020906f
Show file tree
Hide file tree
Showing 15 changed files with 458 additions and 52 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
# Changelog
All notable changes to this project will be documented in this file.

## [0.19.4] - 2019-03-06
### Added
- Support for Portuguese: "pt_pt" and "pt_br"

### Changed
- Enhancement: leverage entity scopes of each intent in deterministic intent parser

## [0.19.3] - 2019-03-05
### Fixed
- Issue with intent classification reducing classification accuracy
Expand Down Expand Up @@ -243,6 +250,7 @@ several commands.
- Fix compiling issue with `bindgen` dependency when installing from source
- Fix issue in `CRFSlotFiller` when handling builtin entities

[0.19.4]: https://github.com/snipsco/snips-nlu/compare/0.19.3...0.19.4
[0.19.3]: https://github.com/snipsco/snips-nlu/compare/0.19.2...0.19.3
[0.19.2]: https://github.com/snipsco/snips-nlu/compare/0.19.1...0.19.2
[0.19.1]: https://github.com/snipsco/snips-nlu/compare/0.19.0...0.19.1
Expand Down
38 changes: 21 additions & 17 deletions docs/source/languages.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,26 @@ Snips NLU supports various languages, that are speficied in the dataset in the
``"language"`` attribute. Here is the list of supported language along with
their isocode:

+------------+------------+
| Language | ISO code |
+============+============+
| German | de |
+------------+------------+
| English | en |
+------------+------------+
| Spanish | es |
+------------+------------+
| French | fr |
+------------+------------+
| Italian | it |
+------------+------------+
| Japanese | ja |
+------------+------------+
| Korean | ko |
+------------+------------+
+-----------------+------------+
| Language | ISO code |
+=================+============+
| German | de |
+-----------------+------------+
| English | en |
+-----------------+------------+
| Spanish | es |
+-----------------+------------+
| French | fr |
+-----------------+------------+
| Italian | it |
+-----------------+------------+
| Japanese | ja |
+-----------------+------------+
| Korean | ko |
+-----------------+------------+
| Portuguese (BR) | pt_br |
+-----------------+------------+
| Portuguese (PT) | pt_pt |
+-----------------+------------+

Support for additional languages will come in the future, stay tuned :)
5 changes: 4 additions & 1 deletion sample_datasets/beverage_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@
"slot_name": "number_of_cups",
"text": "one"
},
{
"text": " "
},
{
"entity": "beverage_type",
"slot_name": "beverage_type",
Expand Down Expand Up @@ -136,4 +139,4 @@
}
},
"language": "en"
}
}
12 changes: 12 additions & 0 deletions sample_datasets/flights_dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@
"slot_name": "destination",
"text": "London"
},
{
"text": " "
},
{
"entity": "snips/datetime",
"slot_name": "flight_time",
Expand Down Expand Up @@ -87,6 +90,9 @@
"slot_name": "departure",
"text": "istanbul"
},
{
"text": " "
},
{
"entity": "snips/datetime",
"slot_name": "flight_time",
Expand All @@ -112,6 +118,9 @@
"slot_name": "destination",
"text": "chicago"
},
{
"text": " "
},
{
"entity": "snips/datetime",
"slot_name": "flight_datetime",
Expand Down Expand Up @@ -145,6 +154,9 @@
"slot_name": "flight_time",
"text": "this weekend"
},
{
"text": " "
},
{
"text": " ?"
}
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
"scikit-learn>=0.19,<0.20",
"sklearn-crfsuite>=0.3.6,<0.4",
"semantic_version>=2.6,<3.0",
"snips_nlu_utils>=0.7,<0.8",
"snips_nlu_parsers>=0.1,<0.2",
"snips-nlu-utils>=0.8,<0.9",
"snips-nlu-parsers>=0.2,<0.3",
"num2words>=0.5.6,<0.6",
"plac>=0.9.6,<1.0",
"requests>=2.0,<3.0",
Expand Down
2 changes: 1 addition & 1 deletion snips_nlu/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
__email__ = "[email protected], [email protected]"
__license__ = "Apache License, Version 2.0"

__version__ = "0.19.3"
__version__ = "0.19.4"
__model_version__ = "0.19.0"

__download_url__ = "https://github.com/snipsco/snips-nlu-language-resources/releases/download"
Expand Down
2 changes: 2 additions & 0 deletions snips_nlu/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,5 @@
LANGUAGE_IT = "it"
LANGUAGE_JA = "ja"
LANGUAGE_KO = "ko"
LANGUAGE_PT_BR = "pt_br"
LANGUAGE_PT_PT = "pt_pt"
6 changes: 5 additions & 1 deletion snips_nlu/default_configs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

from snips_nlu.constants import (
LANGUAGE_DE, LANGUAGE_EN, LANGUAGE_ES, LANGUAGE_FR, LANGUAGE_IT,
LANGUAGE_JA, LANGUAGE_KO)
LANGUAGE_JA, LANGUAGE_KO, LANGUAGE_PT_BR, LANGUAGE_PT_PT)
from .config_de import CONFIG as CONFIG_DE
from .config_en import CONFIG as CONFIG_EN
from .config_es import CONFIG as CONFIG_ES
from .config_fr import CONFIG as CONFIG_FR
from .config_it import CONFIG as CONFIG_IT
from .config_ja import CONFIG as CONFIG_JA
from .config_ko import CONFIG as CONFIG_KO
from .config_pt_br import CONFIG as CONFIG_PT_BR
from .config_pt_pt import CONFIG as CONFIG_PT_PT

DEFAULT_CONFIGS = {
LANGUAGE_DE: CONFIG_DE,
Expand All @@ -19,4 +21,6 @@
LANGUAGE_IT: CONFIG_IT,
LANGUAGE_JA: CONFIG_JA,
LANGUAGE_KO: CONFIG_KO,
LANGUAGE_PT_BR: CONFIG_PT_BR,
LANGUAGE_PT_PT: CONFIG_PT_PT,
}
170 changes: 170 additions & 0 deletions snips_nlu/default_configs/config_pt_br.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
from __future__ import unicode_literals

CONFIG = {
"unit_name": "nlu_engine",
"intent_parsers_configs": [
{
"unit_name": "deterministic_intent_parser",
"max_queries": 500,
"max_pattern_length": 1000,
"ignore_stop_words": True
},
{
"unit_name": "probabilistic_intent_parser",
"slot_filler_config": {
"unit_name": "crf_slot_filler",
"feature_factory_configs": [
{
"args": {
"common_words_gazetteer_name":
"top_5000_words_stemmed",
"use_stemming": True,
"n": 1
},
"factory_name": "ngram",
"offsets": [
-2,
-1,
0,
1,
2
]
},
{
"args": {
"common_words_gazetteer_name":
"top_5000_words_stemmed",
"use_stemming": True,
"n": 2
},
"factory_name": "ngram",
"offsets": [
-2,
1
]
},
{
"args": {},
"factory_name": "is_digit",
"offsets": [
-1,
0,
1
]
},
{
"args": {},
"factory_name": "is_first",
"offsets": [
-2,
-1,
0
]
},
{
"args": {},
"factory_name": "is_last",
"offsets": [
0,
1,
2
]
},
{
"args": {
"n": 1
},
"factory_name": "shape_ngram",
"offsets": [
0
]
},
{
"args": {
"n": 2
},
"factory_name": "shape_ngram",
"offsets": [
-1,
0
]
},
{
"args": {
"n": 3
},
"factory_name": "shape_ngram",
"offsets": [
-1
]
},
{
"args": {
"use_stemming": True,
"tagging_scheme_code": 2
},
"factory_name": "entity_match",
"offsets": [
-2,
-1,
0
],
"drop_out": 0.5
},
{
"args": {
"tagging_scheme_code": 1
},
"factory_name": "builtin_entity_match",
"offsets": [
-2,
-1,
0
]
}
],
"crf_args": {
"c1": 0.1,
"c2": 0.1,
"algorithm": "lbfgs"
},
"tagging_scheme": 1,
"data_augmentation_config": {
"min_utterances": 200,
"capitalization_ratio": 0.2,
"add_builtin_entities_examples": True
},
"random_seed": None
},
"intent_classifier_config": {
"unit_name": "log_reg_intent_classifier",
"data_augmentation_config": {
"min_utterances": 20,
"noise_factor": 5,
"add_builtin_entities_examples": False,
"max_unknown_words": None,
"unknown_word_prob": 0.0,
"unknown_words_replacement_string": None
},
"featurizer_config": {
"unit_name": "featurizer",
"pvalue_threshold": 0.4,
"added_cooccurrence_feature_ratio": 0.0,
"tfidf_vectorizer_config": {
"unit_name": "tfidf_vectorizer",
"use_stemming": True,
"word_clusters_name": None
},
"cooccurrence_vectorizer_config": {
"unit_name": "cooccurrence_vectorizer",
"window_size": None,
"filter_stop_words": True,
"unknown_words_replacement_string": None,
"keep_order": True
}
},
"random_seed": None
}
}
]
}
Loading

0 comments on commit 020906f

Please sign in to comment.