diff --git a/CHANGELOG.md b/CHANGELOG.md
index b6f238a59..4b72c467a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,20 @@
 # Changelog
 All notable changes to this project will be documented in this file.
 
+## [0.16.2] - 2018-08-08
+### Added
+- `automatically_extensible` flag in dataset generation tool
+- System requirements
+- Reference to chatito tool in documentation
+
+### Changed
+- Bump `snips-nlu-ontology` to `0.57.3`
+- versions of dependencies are now defined more loosely
+
+### Fixed
+- Issue with synonyms mapping
+- Issue with `snips-nlu download-all-languages` CLI command
+
 ## [0.16.1] - 2018-07-23
 ### Added
 - Every processing unit can be persisted into (and loaded from) a `bytearray`
@@ -113,6 +127,7 @@ several commands.
 - Fix compiling issue with `bindgen` dependency when installing from source
 - Fix issue in `CRFSlotFiller` when handling builtin entities
 
+[0.16.2]: https://github.com/snipsco/snips-nlu/compare/0.16.1...0.16.2
 [0.16.1]: https://github.com/snipsco/snips-nlu/compare/0.16.0...0.16.1
 [0.16.0]: https://github.com/snipsco/snips-nlu/compare/0.15.1...0.16.0
 [0.15.1]: https://github.com/snipsco/snips-nlu/compare/0.15.0...0.15.1
diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst
index a76bb5d25..01e3c1dc1 100644
--- a/CONTRIBUTORS.rst
+++ b/CONTRIBUTORS.rst
@@ -5,3 +5,4 @@ This is a list of everyone who has made significant contributions to Snips NLU,
 
 * `Alice Coucke <https://github.com/choufractal>`_
 * `Josh Meyer <https://github.com/JRMeyer>`_
+* `Matthieu Brouillard <https://github.com/McFoggy>`_
\ No newline at end of file
diff --git a/README.rst b/README.rst
index a45844046..7bf970a47 100644
--- a/README.rst
+++ b/README.rst
@@ -24,6 +24,13 @@ Snips NLU
 
 Check out our `blog post`_ to get more details about why we built Snips NLU and how it works under the hood.
 
+System requirements
+-------------------
+- 64-bit Linux, MacOS >= 10.11, 64-bit Windows
+- Python 2.7 or Python >= 3.4
+- RAM: Snips NLU will typically use between 100MB and 200MB of RAM, depending on the language and the size of the dataset.
+
+
 Installation
 ------------
 
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index fa0622e49..806a0a1e2 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -3,6 +3,13 @@
 Installation
 ============
 
+System requirements
+-------------------
+- 64-bit Linux, MacOS >= 10.11, 64-bit Windows
+- Python 2.7 or Python >= 3.4
+- RAM: Snips NLU will typically use between 100MB and 200MB of RAM, depending on the language and the size of the dataset.
+
+
 Python Version
 --------------
 
diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst
index 3d0ce5202..a2b16ef6b 100644
--- a/docs/source/tutorial.rst
+++ b/docs/source/tutorial.rst
@@ -29,9 +29,10 @@ parse as well as easy to read.
 We created a `sample dataset`_ that you can check to better understand the
 format.
 
-You have two options to create your dataset. You can build it manually by
-respecting the format used in the sample or alternatively you can use the
-dataset creation CLI that is contained in the lib.
+You have three options to create your dataset. You can build it manually by
+respecting the format used in the sample, you can also use the dataset creation 
+CLI included in the lib, or alternatively you can use `chatito`_ a DSL 
+tool for dataset generation.
 
 We will go for the second option here and start by creating three files
 corresponding to our three intents and one entity file corresponding to the
@@ -102,6 +103,15 @@ double quotes ``"``. If the value contains double quotes, it must be doubled
 to be escaped like this:  ``"A value with a "","" in it"`` which corresponds
 to the actual value ``A value with a "," in it``.
 
+.. Note::
+
+    By default entities are generated as :ref:`automatically extensible <auto_extensible>`, i.e. the recognition will accept additional values than the ones listed in the entity file.
+    This behavior can be changed by adding at the beginning of the entity file the following:
+
+    .. code-block:: bash
+
+       # automatically_extensible=false
+
 We are now ready to generate our dataset:
 
 .. code-block:: bash
@@ -364,3 +374,4 @@ Alternatively, you can persist/load the engine as a ``bytearray``:
 .. _sample dataset: https://github.com/snipsco/snips-nlu/blob/master/snips_nlu_samples/sample_dataset.json
 .. _default configurations: https://github.com/snipsco/snips-nlu/blob/master/snips_nlu/default_configs
 .. _english one: https://github.com/snipsco/snips-nlu/blob/master/snips_nlu/default_configs/config_en.py
+.. _chatito: https://github.com/rodrigopivi/Chatito
diff --git a/setup.py b/setup.py
index eabd2285b..9e6f947de 100644
--- a/setup.py
+++ b/setup.py
@@ -13,44 +13,38 @@
     about = dict()
     exec(f.read(), about)
 
-
 with io.open(os.path.join(root, "README.rst"), encoding="utf8") as f:
     readme = f.read()
 
-nlu_metrics_version = "0.12.0"
-
 required = [
-    "enum34==1.1.6",
-    "pathlib==1.0.1",
+    "enum34>=1.1,<2.0",
     "numpy==1.14.0",
-    "scipy==1.0.0",
-    "scikit-learn==0.19.1",
-    "sklearn-crfsuite==0.3.6",
-    "semantic_version==2.6.0",
-    "snips_nlu_utils==0.6.1",
-    "snips_nlu_ontology==0.57.2",
-    "num2words==0.5.6",
-    "plac==0.9.6",
-    "requests==2.18.4"
+    "scipy>=1.0,<2.0",
+    "scikit-learn>=0.19,<0.20",
+    "sklearn-crfsuite>=0.3.6,<0.4",
+    "semantic_version>=2.6,<3.0",
+    "snips_nlu_utils>=0.6.1,<0.7",
+    "snips_nlu_ontology==0.57.3",
+    "num2words>=0.5.6,<0.6",
+    "plac>=0.9.6,<1.0",
+    "requests>=2.0,<3.0",
+    "pathlib==1.0.1; python_version < '3.4'",
 ]
 
 extras_require = {
     "doc": [
-        "sphinx==1.7.1",
-        "sphinxcontrib-napoleon==0.6.1",
-        "sphinx-rtd-theme==0.2.4"
+        "sphinx>=1.7,<2.0",
+        "sphinxcontrib-napoleon>=0.6.1,<0.7",
+        "sphinx-rtd-theme>=0.2.4,<0.3"
     ],
     "metrics": [
-        "snips_nlu_metrics==%s" % nlu_metrics_version,
+        "snips_nlu_metrics>=0.13,<0.14",
     ],
     "test": [
-        "mock==2.0.0",
-        "snips_nlu_metrics==%s" % nlu_metrics_version,
-        "pylint==1.8.2",
-        "coverage==4.4.2"
-    ],
-    "integration_test": [
-        "snips_nlu_metrics==%s" % nlu_metrics_version,
+        "mock>=2.0,<3.0",
+        "snips_nlu_metrics>=0.13,<0.14",
+        "pylint>=1.8,<2.0",
+        "coverage>=4.4.2,<5.0"
     ]
 }
 
diff --git a/snips_nlu/__about__.py b/snips_nlu/__about__.py
index 5c6c57e34..b1847715b 100644
--- a/snips_nlu/__about__.py
+++ b/snips_nlu/__about__.py
@@ -11,7 +11,7 @@
 __email__ = "clement.doumouro@snips.ai, adrien.ball@snips.ai"
 __license__ = "Apache License, Version 2.0"
 
-__version__ = "0.16.1"
+__version__ = "0.16.2"
 __model_version__ = "0.16.0"
 
 __download_url__ = "https://github.com/snipsco/snips-nlu-language-resources/releases/download"
diff --git a/snips_nlu/cli/dataset/entities.py b/snips_nlu/cli/dataset/entities.py
index 07da20735..1ecec8744 100644
--- a/snips_nlu/cli/dataset/entities.py
+++ b/snips_nlu/cli/dataset/entities.py
@@ -2,6 +2,7 @@
 from __future__ import unicode_literals
 
 import csv
+import re
 from abc import ABCMeta, abstractmethod
 from pathlib import Path
 
@@ -12,6 +13,7 @@
 from snips_nlu.constants import (
     VALUE, SYNONYMS, AUTOMATICALLY_EXTENSIBLE, USE_SYNONYMS, DATA)
 
+AUTO_EXT_REGEX = re.compile(r'^#\sautomatically_extensible=(true|false)\s*$')
 
 class Entity(with_metaclass(ABCMeta, object)):
     def __init__(self, name):
@@ -56,17 +58,23 @@ def from_file(cls, filepath):
             if six.PY2:
                 it = list(utf_8_encoder(it))
             reader = csv.reader(list(it))
+            autoextent = True
             for row in reader:
                 if six.PY2:
                     row = [cell.decode("utf-8") for cell in row]
                 value = row[0]
+                if reader.line_num == 1:
+                    m = AUTO_EXT_REGEX.match(row[0])
+                    if m:
+                        autoextent = not m.group(1).lower() == 'false'
+                        continue
                 if len(row) > 1:
                     synonyms = row[1:]
                 else:
                     synonyms = []
                 utterances.append(EntityUtterance(value, synonyms))
-        return cls(entity_name, utterances, automatically_extensible=True,
-                   use_synonyms=True)
+        return cls(entity_name, utterances,
+                   automatically_extensible=autoextent, use_synonyms=True)
 
     @property
     def json(self):
diff --git a/snips_nlu/cli/dataset/examples/entity_location_autoextent_false.txt b/snips_nlu/cli/dataset/examples/entity_location_autoextent_false.txt
new file mode 100644
index 000000000..243c4d290
--- /dev/null
+++ b/snips_nlu/cli/dataset/examples/entity_location_autoextent_false.txt
@@ -0,0 +1,4 @@
+# automatically_extensible=false
+new york,big apple
+paris,city of lights
+london
\ No newline at end of file
diff --git a/snips_nlu/cli/download.py b/snips_nlu/cli/download.py
index b1b12613d..44e73f457 100644
--- a/snips_nlu/cli/download.py
+++ b/snips_nlu/cli/download.py
@@ -70,7 +70,7 @@ def download(resource_name, direct=False,
 def download_all_languages(*pip_args):
     """Download compatible resources for all supported languages"""
     for language in get_all_languages():
-        download(language, *pip_args)
+        download(language, False, *pip_args)
 
 
 def _get_compatibility():
@@ -106,7 +106,7 @@ def _get_installed_languages():
     for directory in DATA_PATH.iterdir():
         if not directory.is_dir():
             continue
-        with (directory / "metadata.json").open() as f:
+        with (directory / "metadata.json").open(encoding="utf8") as f:
             metadata = json.load(f)
         languages.add(metadata["language"])
     return languages
diff --git a/snips_nlu/cli/generate_dataset.py b/snips_nlu/cli/generate_dataset.py
index aff954102..ffb0cea89 100644
--- a/snips_nlu/cli/generate_dataset.py
+++ b/snips_nlu/cli/generate_dataset.py
@@ -1,4 +1,4 @@
-from __future__ import unicode_literals
+from __future__ import print_function, unicode_literals
 
 import json
 
diff --git a/snips_nlu/cli/inference.py b/snips_nlu/cli/inference.py
index 24f62bbe1..cfcfe2819 100644
--- a/snips_nlu/cli/inference.py
+++ b/snips_nlu/cli/inference.py
@@ -1,4 +1,4 @@
-from __future__ import unicode_literals
+from __future__ import unicode_literals, print_function
 
 import json
 from builtins import input
diff --git a/snips_nlu/cli/training.py b/snips_nlu/cli/training.py
index a057d4423..1dd340bfa 100644
--- a/snips_nlu/cli/training.py
+++ b/snips_nlu/cli/training.py
@@ -1,4 +1,4 @@
-from __future__ import unicode_literals
+from __future__ import unicode_literals, print_function
 
 import json
 from pathlib import Path
diff --git a/snips_nlu/dataset.py b/snips_nlu/dataset.py
index e9a627b8b..9df081ff1 100644
--- a/snips_nlu/dataset.py
+++ b/snips_nlu/dataset.py
@@ -1,9 +1,10 @@
 from __future__ import division, unicode_literals
 
 import json
-from builtins import str
+from collections import Counter
 from copy import deepcopy
 
+from builtins import str
 from future.utils import iteritems, itervalues
 from snips_nlu_ontology import get_all_languages
 
@@ -97,16 +98,23 @@ def has_any_capitalization(entity_utterances, language):
     return False
 
 
-def add_variation_if_needed(utterances, variation, utterance, language):
-    if not variation:
-        return utterances
-    all_variations = get_string_variations(variation, language)
-    for v in all_variations:
-        if v not in utterances:
-            utterances[v] = utterance
+def add_entity_variations(utterances, entity_variations, entity_value):
+    utterances[entity_value] = entity_value
+    for variation in entity_variations[entity_value]:
+        if variation:
+            utterances[variation] = entity_value
     return utterances
 
 
+def _extract_entity_values(entity):
+    values = set()
+    for ent in entity[DATA]:
+        values.add(ent[VALUE])
+        if entity[USE_SYNONYMS]:
+            values.update(set(ent[SYNONYMS]))
+    return values
+
+
 def validate_and_format_custom_entity(entity, queries_entities, language):
     validate_type(entity, dict)
     mandatory_keys = [USE_SYNONYMS, AUTOMATICALLY_EXTENSIBLE, DATA]
@@ -139,33 +147,59 @@ def validate_and_format_custom_entity(entity, queries_entities, language):
     formatted_entity[CAPITALIZE] = has_any_capitalization(queries_entities,
                                                           language)
 
-    # Normalize
-    validated_data = dict()
-    for entry in entity[DATA]:
-        entry_value = entry[VALUE]
-        validated_data = add_variation_if_needed(
-            validated_data, entry_value, entry_value, language)
-
+    validated_utterances = dict()
+    # Map original values an synonyms
+    for data in entity[DATA]:
+        ent_value = data[VALUE]
+        if not ent_value:
+            continue
+        validated_utterances[ent_value] = ent_value
         if use_synonyms:
-            for s in entry[SYNONYMS]:
-                validated_data = add_variation_if_needed(
-                    validated_data, s, entry_value, language)
-
-    formatted_entity[UTTERANCES] = validated_data
-    # Merge queries_entities
-    for value in queries_entities:
-        formatted_entity = add_entity_value_if_missing(
-            value, formatted_entity, language)
+            for s in data[SYNONYMS]:
+                if s and s not in validated_utterances:
+                    validated_utterances[s] = ent_value
+
+    # Add variations if not colliding
+    all_original_values = _extract_entity_values(entity)
+    variations = dict()
+    for data in entity[DATA]:
+        ent_value = data[VALUE]
+        values_to_variate = {ent_value}
+        if use_synonyms:
+            values_to_variate.update(set(data[SYNONYMS]))
+        variations[ent_value] = set(
+            v for value in values_to_variate
+            for v in get_string_variations(value, language))
+    variation_counter = Counter(
+        [v for vars in itervalues(variations) for v in vars])
+    non_colliding_variations = {
+        value: [
+            v for v in variations if
+            v not in all_original_values and variation_counter[v] == 1
+        ]
+        for value, variations in iteritems(variations)
+    }
 
+    for entry in entity[DATA]:
+        entry_value = entry[VALUE]
+        validated_utterances = add_entity_variations(
+            validated_utterances, non_colliding_variations, entry_value)
+
+    # Merge queries entities
+    queries_entities_variations = {
+        ent: get_string_variations(ent, language) for ent in queries_entities
+    }
+    for original_ent, variations in iteritems(queries_entities_variations):
+        if not original_ent or original_ent in validated_utterances:
+            continue
+        validated_utterances[original_ent] = original_ent
+        for variation in variations:
+            if variation and variation not in validated_utterances:
+                validated_utterances[variation] = original_ent
+    formatted_entity[UTTERANCES] = validated_utterances
     return formatted_entity
 
 
 def validate_and_format_builtin_entity(entity, queries_entities):
     validate_type(entity, dict)
     return {UTTERANCES: set(queries_entities)}
-
-
-def add_entity_value_if_missing(value, entity, language):
-    entity[UTTERANCES] = add_variation_if_needed(entity[UTTERANCES], value,
-                                                 value, language)
-    return entity
diff --git a/snips_nlu/intent_classifier/log_reg_classifier.py b/snips_nlu/intent_classifier/log_reg_classifier.py
index 276429e1f..2ebd19112 100644
--- a/snips_nlu/intent_classifier/log_reg_classifier.py
+++ b/snips_nlu/intent_classifier/log_reg_classifier.py
@@ -185,7 +185,7 @@ def from_path(cls, path):
             raise OSError("Missing intent classifier model file: %s"
                           % model_path.name)
 
-        with model_path.open() as f:
+        with model_path.open(encoding="utf8") as f:
             model_dict = json.load(f)
         return cls.from_dict(model_dict)
 
diff --git a/snips_nlu/intent_parser/deterministic_intent_parser.py b/snips_nlu/intent_parser/deterministic_intent_parser.py
index 9d5767777..e6aeb7bb1 100644
--- a/snips_nlu/intent_parser/deterministic_intent_parser.py
+++ b/snips_nlu/intent_parser/deterministic_intent_parser.py
@@ -206,7 +206,7 @@ def from_path(cls, path):
             raise OSError("Missing deterministic intent parser metadata file: "
                           "%s" % metadata_path.name)
 
-        with metadata_path.open() as f:
+        with metadata_path.open(encoding="utf8") as f:
             metadata = json.load(f)
         return cls.from_dict(metadata)
 
diff --git a/snips_nlu/intent_parser/probabilistic_intent_parser.py b/snips_nlu/intent_parser/probabilistic_intent_parser.py
index d05f719c4..63c4b86d5 100644
--- a/snips_nlu/intent_parser/probabilistic_intent_parser.py
+++ b/snips_nlu/intent_parser/probabilistic_intent_parser.py
@@ -170,7 +170,7 @@ def from_path(cls, path):
             raise OSError("Missing probabilistic intent parser model file: "
                           "%s" % model_path.name)
 
-        with model_path.open() as f:
+        with model_path.open(encoding="utf8") as f:
             model = json.load(f)
 
         parser = cls(config=cls.config_type.from_dict(model["config"]))
diff --git a/snips_nlu/nlu_engine/nlu_engine.py b/snips_nlu/nlu_engine/nlu_engine.py
index 28bab524f..8420592d8 100644
--- a/snips_nlu/nlu_engine/nlu_engine.py
+++ b/snips_nlu/nlu_engine/nlu_engine.py
@@ -212,7 +212,7 @@ def from_path(cls, path):
             raise OSError("Missing nlu engine model file: %s"
                           % model_path.name)
 
-        with model_path.open() as f:
+        with model_path.open(encoding="utf8") as f:
             model = json.load(f)
         model_version = model.get("model_version")
         if model_version is None or model_version != __model_version__:
diff --git a/snips_nlu/pipeline/processing_unit.py b/snips_nlu/pipeline/processing_unit.py
index d003ea73a..d4ee1c90a 100644
--- a/snips_nlu/pipeline/processing_unit.py
+++ b/snips_nlu/pipeline/processing_unit.py
@@ -140,7 +140,7 @@ def load_processing_unit(unit_path):
     """Load a :class:`ProcessingUnit` from a persisted processing unit
     directory"""
     unit_path = Path(unit_path)
-    with (unit_path / "metadata.json").open() as f:
+    with (unit_path / "metadata.json").open(encoding="utf8") as f:
         metadata = json.load(f)
     unit = _get_unit_type(metadata["unit_name"])
     return unit.from_path(unit_path)
diff --git a/snips_nlu/resources.py b/snips_nlu/resources.py
index 03659157c..1bd2aa80c 100644
--- a/snips_nlu/resources.py
+++ b/snips_nlu/resources.py
@@ -50,7 +50,7 @@ def load_resources(name):
 
 
 def load_resources_from_dir(resources_dir):
-    with (resources_dir / "metadata.json").open() as f:
+    with (resources_dir / "metadata.json").open(encoding="utf8") as f:
         metadata = json.load(f)
     language = metadata["language"]
     if language in _RESOURCES:
@@ -78,7 +78,7 @@ def load_resources_from_dir(resources_dir):
 
 def get_resources_sub_directory(resources_dir):
     resources_dir = Path(resources_dir)
-    with (resources_dir / "metadata.json").open() as f:
+    with (resources_dir / "metadata.json").open(encoding="utf8") as f:
         metadata = json.load(f)
     resource_name = metadata["name"]
     version = metadata["version"]
diff --git a/snips_nlu/slot_filler/crf_slot_filler.py b/snips_nlu/slot_filler/crf_slot_filler.py
index e3aeba2d2..5ebd9a763 100644
--- a/snips_nlu/slot_filler/crf_slot_filler.py
+++ b/snips_nlu/slot_filler/crf_slot_filler.py
@@ -346,7 +346,7 @@ def from_path(cls, path):
             raise OSError("Missing slot filler model file: %s"
                           % model_path.name)
 
-        with model_path.open() as f:
+        with model_path.open(encoding="utf8") as f:
             model = json.load(f)
 
         slot_filler_config = cls.config_type.from_dict(model["config"])
diff --git a/snips_nlu/tests/test_cli.py b/snips_nlu/tests/test_cli.py
index 8d58e99d7..3cba2e5a6 100644
--- a/snips_nlu/tests/test_cli.py
+++ b/snips_nlu/tests/test_cli.py
@@ -23,6 +23,7 @@ class TestCLI(SnipsTest):
 
     # pylint: disable=protected-access
     def setUp(self):
+        super(TestCLI, self).setUp()
         if not self.fixture_dir.exists():
             self.fixture_dir.mkdir()
 
@@ -181,6 +182,40 @@ def test_should_generate_entity_from_file(self):
         }
         self.assertDictEqual(expected_entity_dict, entity_dict)
 
+    def test_should_generate_entity_from_file_with_autoextensible(self):
+        # Given
+        examples_path = PACKAGE_PATH / "cli" / "dataset" / "examples"
+        entity_file = examples_path / "entity_location_autoextent_false.txt"
+
+        # When
+        entity_dataset = CustomEntity.from_file(entity_file)
+        entity_dict = entity_dataset.json
+
+        # Then
+        expected_entity_dict = {
+            "automatically_extensible": False,
+            "data": [
+                {
+                    "synonyms": [
+                        "big apple"
+                    ],
+                    "value": "new york"
+                },
+                {
+                    "synonyms": [
+                        "city of lights"
+                    ],
+                    "value": "paris"
+                },
+                {
+                    "synonyms": [],
+                    "value": "london"
+                }
+            ],
+            "use_synonyms": True
+        }
+        self.assertDictEqual(expected_entity_dict, entity_dict)
+
     def test_should_generate_dataset_from_files(self):
         # Given
         examples_path = PACKAGE_PATH / "cli" / "dataset" / "examples"
diff --git a/snips_nlu/tests/test_dataset.py b/snips_nlu/tests/test_dataset.py
index 320190130..1db610a3a 100644
--- a/snips_nlu/tests/test_dataset.py
+++ b/snips_nlu/tests/test_dataset.py
@@ -2,7 +2,6 @@
 from __future__ import unicode_literals
 
 from builtins import str
-
 from mock import mock
 
 from snips_nlu.constants import (
@@ -37,8 +36,8 @@ def test_missing_intent_key_should_raise_exception(self):
         # When/Then
         with self.assertRaises(KeyError) as ctx:
             validate_and_format_dataset(dataset)
-        self.assertEqual(str(ctx.exception.args[0]),
-                         "Expected chunk to have key: 'slot_name'")
+        self.assertEqual("Expected chunk to have key: 'slot_name'",
+                         str(ctx.exception.args[0]))
 
     def test_unknown_entity_should_raise_exception(self):
         # Given
@@ -72,8 +71,7 @@ def test_unknown_entity_should_raise_exception(self):
         # When/Then
         with self.assertRaises(KeyError) as ctx:
             validate_and_format_dataset(dataset)
-        self.assertEqual(str(ctx.exception.args[0]),
-                         "Expected entities to have key: 'unknown_entity'")
+        self.assertEqual("Expected entities to have key: 'unknown_entity'", str(ctx.exception.args[0]))
 
     def test_missing_entity_key_should_raise_exception(self):
         # Given
@@ -92,8 +90,7 @@ def test_missing_entity_key_should_raise_exception(self):
         # When/Then
         with self.assertRaises(KeyError) as ctx:
             validate_and_format_dataset(dataset)
-        self.assertEqual(str(ctx.exception.args[0]),
-                         "Expected entity to have key: 'use_synonyms'")
+        self.assertEqual("Expected entity to have key: 'use_synonyms'", str(ctx.exception.args[0]))
 
     def test_invalid_language_should_raise_exception(self):
         # Given
@@ -107,7 +104,7 @@ def test_invalid_language_should_raise_exception(self):
         # When/Then
         with self.assertRaises(ValueError) as ctx:
             validate_and_format_dataset(dataset)
-        self.assertEqual(str(ctx.exception.args[0]), "Unknown language: 'eng'")
+        self.assertEqual("Unknown language: 'eng'", str(ctx.exception.args[0]))
 
     @mock.patch("snips_nlu.dataset.get_string_variations")
     def test_should_format_dataset_by_adding_synonyms(
@@ -158,7 +155,7 @@ def mock_get_string_variations(variation, language):
         dataset = validate_and_format_dataset(dataset)
 
         # Then
-        self.assertDictEqual(dataset, expected_dataset)
+        self.assertDictEqual(expected_dataset, dataset)
 
     @mock.patch("snips_nlu.dataset.get_string_variations")
     def test_should_format_dataset_by_adding_entity_values(
@@ -269,7 +266,7 @@ def mock_get_string_variations(variation, language):
         dataset = validate_and_format_dataset(dataset)
 
         # Then
-        self.assertEqual(dataset, expected_dataset)
+        self.assertEqual(expected_dataset, dataset)
 
     @mock.patch("snips_nlu.dataset.get_string_variations")
     def test_should_add_missing_reference_entity_values_when_not_use_synonyms(
@@ -377,7 +374,7 @@ def mock_get_string_variations(variation, language):
         dataset = validate_and_format_dataset(dataset)
 
         # Then
-        self.assertEqual(dataset, expected_dataset)
+        self.assertEqual(expected_dataset, dataset)
 
     def test_should_not_require_data_for_builtin_entities(self):
         # Given
@@ -521,7 +518,7 @@ def mock_get_string_variations(variation, language):
         dataset = validate_and_format_dataset(dataset)
 
         # Then
-        self.assertEqual(dataset, expected_dataset)
+        self.assertEqual(expected_dataset, dataset)
 
     @mock.patch("snips_nlu.dataset.get_string_variations")
     def test_should_add_capitalize_field(
@@ -691,7 +688,7 @@ def mock_get_string_variations(variation, language):
         dataset = validate_and_format_dataset(dataset)
 
         # Then
-        self.assertDictEqual(dataset, expected_dataset)
+        self.assertDictEqual(expected_dataset, dataset)
 
     @mock.patch("snips_nlu.dataset.get_string_variations")
     def test_should_normalize_synonyms(
@@ -749,6 +746,7 @@ def mock_get_string_variations(variation, language):
                     "utterances": {
                         "ëntity": "ëNtity",
                         "Ëntity": "ëNtity",
+                        "ëNtity": "ëNtity"
                     },
                     "automatically_extensible": True,
                     "capitalize": False
@@ -763,7 +761,7 @@ def mock_get_string_variations(variation, language):
         dataset = validate_and_format_dataset(dataset)
 
         # Then
-        self.assertDictEqual(dataset, expected_dataset)
+        self.assertDictEqual(expected_dataset, dataset)
 
     @mock.patch("snips_nlu.dataset.get_string_variations")
     def test_dataset_should_handle_synonyms(
@@ -809,3 +807,65 @@ def mock_get_string_variations(variation, language):
 
         # Then
         self.assertDictEqual(dataset[ENTITIES], expected_entities)
+
+    def test_should_not_avoid_synomyms_variations_collision(self):
+        # Given
+        dataset = {
+            "intents": {
+                "dummy_but_tricky_intent": {
+                    "utterances": [
+                        {
+                            "data": [
+                                {
+                                    "text": "dummy_value",
+                                    "entity": "dummy_but_tricky_entity",
+                                    "slot_name": "dummy_but_tricky_slot"
+                                }
+                            ]
+                        }
+                    ]
+                }
+            },
+            "entities": {
+                "dummy_but_tricky_entity": {
+                    "data": [
+                        {
+                            "value": "a",
+                            "synonyms": [
+                                "favorïte"
+                            ]
+                        },
+                        {
+                            "value": "b",
+                            "synonyms": [
+                                "favorite"
+                            ]
+                        }
+                    ],
+                    "use_synonyms": True,
+                    "automatically_extensible": False
+                }
+            },
+            "language": "en",
+            "snips_nlu_version": "0.15.0"
+        }
+
+        # When
+        dataset = validate_and_format_dataset(dataset)
+
+        # Then
+        entity = dataset["entities"]["dummy_but_tricky_entity"]
+        expected_utterances = {
+            "A": "a",
+            "B": "b",
+            "DummyValue": "dummy_value",
+            "Dummy_Value": "dummy_value",
+            "Favorïte": "a",
+            "a": "a",
+            "b": "b",
+            "dummy_value": "dummy_value",
+            "dummyvalue": "dummy_value",
+            "favorite": "b",
+            "favorïte": "a"
+        }
+        self.assertDictEqual(expected_utterances, entity["utterances"])
diff --git a/snips_nlu/tests/utils.py b/snips_nlu/tests/utils.py
index 4ea44722f..3d3db3061 100644
--- a/snips_nlu/tests/utils.py
+++ b/snips_nlu/tests/utils.py
@@ -23,8 +23,7 @@
 
 class SnipsTest(TestCase):
 
-    def __init__(self, methodName='runTest'):
-        super(SnipsTest, self).__init__(methodName)
+    def setUp(self):
         for l in get_all_languages():
             load_resources(l)
 
@@ -39,14 +38,14 @@ def fail_if_exception(self, msg):
     def assertJsonContent(self, json_path, expected_dict):
         if not json_path.exists():
             self.fail("Json file not found: %s" % str(json_path))
-        with json_path.open() as f:
+        with json_path.open(encoding="utf8") as f:
             data = json.load(f)
         self.assertDictEqual(expected_dict, data)
 
     def assertFileContent(self, path, expected_content):
         if not path.exists():
             self.fail("File not found: %s" % str(path))
-        with path.open() as f:
+        with path.open(encoding="utf8") as f:
             data = f.read()
         self.assertEqual(expected_content, data)
 
@@ -68,6 +67,7 @@ class FixtureTest(SnipsTest):
 
     # pylint: disable=protected-access
     def setUp(self):
+        super(FixtureTest, self).setUp()
         if not self.fixture_dir.exists():
             self.fixture_dir.mkdir()
 
diff --git a/tox.ini b/tox.ini
index 442d3e892..37ccd1282 100644
--- a/tox.ini
+++ b/tox.ini
@@ -31,7 +31,7 @@ setenv=
 basepython = python3.6
 skip_install = true
 commands =
-    pip install -e ".[test,integration_test]"
+    pip install -e ".[test]"
     snips-nlu download snips_nlu_de-0.2.0 --direct
     snips-nlu download snips_nlu_en-0.2.0 --direct
     snips-nlu download snips_nlu_es-0.2.0 --direct