From f427a5e7a9bfa4c6d91153e1910018f818964b1f Mon Sep 17 00:00:00 2001 From: Alex Garel Date: Thu, 24 Oct 2024 14:26:38 +0200 Subject: [PATCH] feat: handling synonyms and text fields more efficiently (#234) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Using synonyms capabilities of ES to avoid storing taxonomies fields in the index * Better handling of full text queries that support them within any expression * make boost_phrase a separate parameter * Raise errors if the query is not well understood or do not pass some sanity checks * Use main translation of taxonomy for facets values (instead of a random synonym) * Better handling of global config to avoid treacherous patterns * Unify parameters for Get and Post (better use of pydantic) * Error on extraneous search parameters to avoid hard to debug issues with typos * Add a command to clean indexes * Integrations tests on search and analyzers Part of: #193 --------- Co-authored-by: Raphaël Bournhonesque --- .pre-commit-config.yaml | 7 + Dockerfile | 14 +- Makefile | 34 +- app/_import.py | 107 ++-- app/_types.py | 510 ++++++++++++------ app/api.py | 91 +--- app/charts.py | 10 +- app/cli/main.py | 100 +++- app/config.py | 176 +++--- app/es_query_builder.py | 187 +++++++ app/exceptions.py | 35 ++ app/facets.py | 25 +- app/indexing.py | 263 ++++----- app/openfoodfacts.py | 43 +- app/postprocessing.py | 24 +- app/query.py | 259 ++++----- app/query_transformers.py | 218 ++++++++ app/search.py | 87 ++- app/taxonomy.py | 16 +- app/taxonomy_es.py | 94 +++- app/utils/analyzers.py | 192 ++++++- app/utils/io.py | 25 + app/validations.py | 8 +- data/config/openfoodfacts.yml | 102 ++-- docker-compose.yml | 5 +- docker/prod.yml | 3 + docs/users/explain-configuration.md | 53 +- docs/users/explain-taxonomies.md | 65 +++ docs/users/ref-web-components.md | 3 +- frontend/public/off.html | 6 +- frontend/src/mixins/search-ctl.ts | 25 +- frontend/src/search-chart.ts | 4 +- frontend/src/test/search-bar_test.ts | 29 +- poetry.lock | 98 +++- pyproject.toml | 5 +- scripts/Dockerfile.schema | 2 +- tests/cli_utils.py | 24 + tests/conftest.py | 27 +- tests/int/__init__.py | 0 tests/int/conftest.py | 87 +++ tests/int/data/test_categories.full.json | 1 + tests/int/data/test_labels.full.json | 1 + tests/int/data/test_off.yml | 67 +++ tests/int/data/test_off_data.json | 284 ++++++++++ tests/int/data/test_off_data.jsonl | 9 + tests/int/data/test_off_data_update.jsonl | 2 + tests/int/data_generation.py | 152 ++++++ tests/int/helpers.py | 74 +++ tests/int/test_analyze.py | 114 ++++ tests/int/test_import_data.py | 160 ++++++ tests/int/test_import_taxonomies.py | 32 ++ tests/int/test_search.py | 454 ++++++++++++++++ tests/unit/data/complex_query.json | 224 +++----- tests/unit/data/empty_query_with_sort_by.json | 2 - .../empty_query_with_sort_by_and_facets.json | 14 +- .../unit/data/non_existing_filter_field.json | 39 +- tests/unit/data/non_existing_subfield.json | 36 ++ tests/unit/data/open_range.json | 42 ++ tests/unit/data/openfoodfacts_config.yml | 105 ++-- tests/unit/data/simple_filter_query.json | 40 +- tests/unit/data/simple_full_text_query.json | 104 ++-- .../data/simple_full_text_query_facets.json | 118 ++-- tests/unit/data/sort_by_query.json | 62 +-- tests/unit/data/test_open_range.json | 42 ++ .../data/wildcard_in_phrase_is_legit.json | 53 ++ tests/unit/test__import.py | 14 +- tests/unit/test_config.py | 21 + tests/unit/test_indexing.py | 87 +-- tests/unit/test_query.py | 255 ++++++--- 69 files changed, 4242 insertions(+), 1429 deletions(-) create mode 100644 app/es_query_builder.py create mode 100644 app/exceptions.py create mode 100644 app/query_transformers.py create mode 100644 docs/users/explain-taxonomies.md create mode 100644 tests/cli_utils.py create mode 100644 tests/int/__init__.py create mode 100644 tests/int/conftest.py create mode 100644 tests/int/data/test_categories.full.json create mode 100644 tests/int/data/test_labels.full.json create mode 100644 tests/int/data/test_off.yml create mode 100644 tests/int/data/test_off_data.json create mode 100644 tests/int/data/test_off_data.jsonl create mode 100644 tests/int/data/test_off_data_update.jsonl create mode 100644 tests/int/data_generation.py create mode 100644 tests/int/helpers.py create mode 100644 tests/int/test_analyze.py create mode 100644 tests/int/test_import_data.py create mode 100644 tests/int/test_import_taxonomies.py create mode 100644 tests/int/test_search.py create mode 100644 tests/unit/data/non_existing_subfield.json create mode 100644 tests/unit/data/open_range.json create mode 100644 tests/unit/data/test_open_range.json create mode 100644 tests/unit/data/wildcard_in_phrase_is_legit.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4889b0e7..536b2805 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,11 @@ repos: # Note for all linters: do not forget to update pyproject.toml when updating version. + - repo: https://github.com/python-poetry/poetry + rev: 1.8.4 + hooks: + - id: poetry-lock + args: ["--check"] + - repo: https://github.com/psf/black-pre-commit-mirror rev: 24.8.0 hooks: @@ -15,6 +21,7 @@ repos: rev: 5.13.2 hooks: - id: isort + - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.11.1 hooks: diff --git a/Dockerfile b/Dockerfile index ab7830f8..edfcdb57 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,7 +29,10 @@ ENV PYTHONUNBUFFERED=1 \ FROM python-base as builder-base RUN curl -sSL https://install.python-poetry.org | python3 - WORKDIR $PYSETUP_PATH -COPY poetry.lock pyproject.toml ./ +# we need README.md for poetry check +COPY poetry.lock pyproject.toml README.md ./ +RUN poetry check --lock || \ + ( echo "Poetry.lock is outdated, please run make update_poetry_lock" && false ) RUN poetry install --without dev # This is our final image @@ -40,6 +43,10 @@ COPY --from=builder-base $POETRY_HOME $POETRY_HOME RUN poetry config virtualenvs.create false ENV POETRY_VIRTUALENVS_IN_PROJECT=false +# create some folders, to later ensure right ownership +RUN mkdir -p /opt/search/data && \ + mkdir -p /opt/search/synonyms + # create off user ARG USER_UID ARG USER_GID @@ -66,8 +73,11 @@ CMD ["uvicorn", "app.api:app", "--proxy-headers", "--host", "0.0.0.0", "--port", # ---------------------- FROM builder-base as builder-dev WORKDIR $PYSETUP_PATH -COPY poetry.lock pyproject.toml ./ +# we need README.md for poetry check +COPY poetry.lock pyproject.toml README.md ./ # full install, with dev packages +RUN poetry check --lock || \ + ( echo "Poetry.lock is outdated, please run make update_poetry_lock" && false ) RUN poetry install # image with dev tooling diff --git a/Makefile b/Makefile index 2bd6409b..795f1fd8 100644 --- a/Makefile +++ b/Makefile @@ -22,6 +22,7 @@ endif DOCKER_COMPOSE=docker compose --env-file=${ENV_FILE} DOCKER_COMPOSE_TEST=COMPOSE_PROJECT_NAME=search_test docker compose --env-file=${ENV_FILE} +.PHONY: build create_external_volumes livecheck up down test test_front test_front_watch test_api import-dataset import-taxonomies sync-scripts build-translations generate-openapi check check_front check_translations lint lint_back lint_front #------------# # Production # #------------# @@ -58,7 +59,7 @@ livecheck: build: @echo "🔎 building docker (for dev)" - ${DOCKER_COMPOSE} build --progress=plain + ${DOCKER_COMPOSE} build --progress=plain ${args} up: _ensure_network @@ -107,15 +108,34 @@ tsc_watch: @echo "🔎 Running front-end tsc in watch mode..." ${DOCKER_COMPOSE} run --rm search_nodejs npm run build:watch +update_poetry_lock: + @echo "🔎 Updating poetry.lock" + ${DOCKER_COMPOSE} run --rm api poetry lock --no-update + #-------# # Tests # #-------# -test: _ensure_network test_api test_front +test: _ensure_network check_poetry_lock test_api test_front + +check_poetry_lock: + @echo "🔎 Checking poetry.lock" +# we have to mount whole project folder for pyproject will be checked + ${DOCKER_COMPOSE} run -v $$(pwd):/project -w /project --rm api poetry check --lock + +test_api: test_api_unit test_api_integration + +test_api_unit: + @echo "🔎 Running API unit tests..." + ${DOCKER_COMPOSE_TEST} run --rm api pytest ${args} tests/ --ignore=tests/int + +# you can use keep_es=1 to avoid stopping elasticsearch after tests (useful during development) +test_api_integration: + @echo "🔎 Running API integration tests..." + ${DOCKER_COMPOSE_TEST} up -d es01 es02 elasticvue + ${DOCKER_COMPOSE_TEST} run --rm api pytest ${args} tests/ --ignore=tests/unit + test -z "${keep_es}" && ${DOCKER_COMPOSE_TEST} stop es01 es02 elasticvue || true -test_api: - @echo "🔎 Running API tests..." - ${DOCKER_COMPOSE_TEST} run --rm api pytest ${args} tests/ test_front: @echo "🔎 Running front-end tests..." @@ -125,6 +145,10 @@ test_front_watch: @echo "🔎 Running front-end tests..." ${DOCKER_COMPOSE_TEST} run --rm search_nodejs npm run test:watch +test_clean: + @echo "🔎 Cleaning tests instances..." + ${DOCKER_COMPOSE_TEST} down -v + #-----------# # Utilities # #-----------# diff --git a/app/_import.py b/app/_import.py index 6330cc22..7789af50 100644 --- a/app/_import.py +++ b/app/_import.py @@ -12,13 +12,14 @@ from redis import Redis from app._types import FetcherResult, FetcherStatus, JSONType -from app.config import Config, IndexConfig, TaxonomyConfig +from app.config import Config, IndexConfig, TaxonomyConfig, settings from app.indexing import ( DocumentProcessor, generate_index_object, generate_taxonomy_index_object, ) -from app.taxonomy import get_taxonomy +from app.taxonomy import iter_taxonomies +from app.taxonomy_es import refresh_synonyms from app.utils import connection, get_logger, load_class_object_from_string from app.utils.io import jsonl_iter @@ -226,17 +227,17 @@ def gen_documents( next_index: str, num_items: int | None, num_processes: int, - process_id: int, + process_num: int, ): - """Generate documents to index for process number process_id + """Generate documents to index for process number process_num - We chunk documents based on document num % process_id + We chunk documents based on document num % process_num """ for i, row in enumerate(tqdm.tqdm(jsonl_iter(file_path))): if num_items is not None and i >= num_items: break # Only get the relevant - if i % num_processes != process_id: + if i % num_processes != process_num: continue document_dict = get_document_dict( @@ -260,26 +261,26 @@ def gen_taxonomy_documents( :param supported_langs: a set of supported languages :yield: a dict with the document to index, compatible with ES bulk API """ - for taxonomy_source_config in tqdm.tqdm(taxonomy_config.sources): - taxonomy = get_taxonomy( - taxonomy_source_config.name, str(taxonomy_source_config.url) - ) + for taxonomy_name, taxonomy in tqdm.tqdm(iter_taxonomies(taxonomy_config)): for node in taxonomy.iter_nodes(): - names = {} - for lang in supported_langs: - lang_names = set() - if lang in node.names: - lang_names.add(node.names[lang]) - if lang in node.synonyms: - lang_names |= set(node.synonyms[lang]) - names[lang] = list(lang_names) + names = { + lang: lang_names + for lang, lang_names in node.names.items() + if lang in supported_langs + } + synonyms = { + lang: lang_names + for lang, lang_names in node.synonyms.items() + if lang in supported_langs + } yield { "_index": next_index, "_source": { "id": node.id, - "taxonomy_name": taxonomy_source_config.name, - "names": names, + "taxonomy_name": taxonomy_name, + "name": names, + "synonyms": synonyms, }, } @@ -304,13 +305,22 @@ def update_alias(es_client: Elasticsearch, next_index: str, index_alias: str): ) +def get_alias(es_client: Elasticsearch, index_name: str): + """Get the current index pointed by the alias.""" + resp = es_client.indices.get_alias(name=index_name) + resp = list(resp.keys()) + if len(resp) == 0: + return None + return resp[0] + + def import_parallel( config: IndexConfig, file_path: Path, next_index: str, num_items: int | None, num_processes: int, - process_id: int, + process_num: int, ): """One task of import. @@ -318,12 +328,12 @@ def import_parallel( :param str next_index: the index to write to :param int num_items: max number of items to import, default to no limit :param int num_processes: total number of processes - :param int process_id: the index of the process + :param int process_num: the index of the process (from 0 to num_processes - 1) """ processor = DocumentProcessor(config) # open a connection for this process - es = connection.get_es_client(timeout=120, retry_on_timeout=True) + es = connection.get_es_client(request_timeout=120, retry_on_timeout=True) # Note that bulk works better than parallel bulk for our usecase. # The preprocessing in this file is non-trivial, so it's better to # parallelize that. If we then do parallel_bulk here, this causes queueing @@ -336,13 +346,11 @@ def import_parallel( next_index, num_items, num_processes, - process_id, + process_num, ), raise_on_error=False, ) - if not success: - logger.error("Encountered errors: %s", errors) - return success, errors + return process_num, success, errors def import_taxonomies(config: IndexConfig, next_index: str): @@ -353,8 +361,7 @@ def import_taxonomies(config: IndexConfig, next_index: str): :param config: the index configuration to use :param next_index: the index to write to """ - # open a connection for this process - es = connection.get_es_client(timeout=120, retry_on_timeout=True) + es = connection.current_es_client() # Note that bulk works better than parallel bulk for our usecase. # The preprocessing in this file is non-trivial, so it's better to # parallelize that. If we then do parallel_bulk @@ -480,7 +487,8 @@ def run_items_import( if True consider we don't have a full import, and directly updates items in current index. """ - es_client = connection.get_es_client() + # we need a large timeout as index creation can take a while because of synonyms + es_client = connection.get_es_client(request_timeout=600) if not partial: # we create a temporary index to import to # at the end we will change alias to point to it @@ -488,7 +496,7 @@ def run_items_import( next_index = f"{config.index.name}-{index_date}" index = generate_index_object(next_index, config) # create the index - index.save() + index.save(using=es_client) else: # use current index next_index = config.index.name @@ -509,12 +517,18 @@ def run_items_import( # run in parallel num_errors = 0 with Pool(num_processes) as pool: - for success, errors in pool.starmap(import_parallel, args): - if not success: + for i, success, errors in pool.starmap(import_parallel, args): + # Note: we log here instead of in sub-process because + # it's easier to avoid mixing logs, and it works better for pytest + logger.info("[%d] Indexed %d documents", i, success) + if errors: + logger.error("[%d] Encountered %d errors: %s", i, len(errors), errors) num_errors += len(errors) # update with last index updates (hopefully since the jsonl) if not skip_updates: num_errors += get_redis_updates(es_client, next_index, config) + # wait for index refresh + es_client.indices.refresh(index=next_index) if not partial: # make alias point to new index update_alias(es_client, next_index, config.index.name) @@ -537,11 +551,38 @@ def perform_taxonomy_import(config: IndexConfig) -> None: index.save() import_taxonomies(config, next_index) + # wait for index refresh + es_client.indices.refresh(index=next_index) # make alias point to new index update_alias(es_client, next_index, config.taxonomy.index.name) +def perform_cleanup_indexes(config: IndexConfig) -> int: + """Delete old indexes (that have no active alias on them).""" + removed = 0 + # some timeout for it can be long + es_client = connection.get_es_client(request_timeout=600) + prefixes = [config.index.name, config.taxonomy.index.name] + for prefix in prefixes: + # get all indexes + indexes = es_client.indices.get_alias(index=f"{prefix}-*") + # remove all index without alias + to_remove = [ + index for index, data in indexes.items() if not data.get("aliases") + ] + for index in to_remove: + logger.info("Deleting index %s", index) + es_client.indices.delete(index=index) + removed += 1 + return removed + + +def perform_refresh_synonyms(index_id: str, config: IndexConfig) -> None: + """Refresh synonyms files generated by taxonomies.""" + refresh_synonyms(index_id, config, settings.synonyms_path) + + def run_update_daemon(config: Config) -> None: """Run the update import daemon. diff --git a/app/_types.py b/app/_types.py index 7712c89c..04bc7dc5 100644 --- a/app/_types.py +++ b/app/_types.py @@ -1,12 +1,12 @@ -import textwrap -from enum import Enum +from enum import Enum, StrEnum from functools import cached_property -from typing import Annotated, Any, Literal, Optional, Tuple, Union, cast, get_type_hints +from inspect import cleandoc as cd_ +from typing import Annotated, Any, Literal, Optional, Tuple, Union, cast import elasticsearch_dsl.query import luqum.tree from fastapi import Query -from pydantic import BaseModel, ConfigDict, model_validator +from pydantic import BaseModel, ConfigDict, field_validator, model_validator from . import config from .utils import str_utils @@ -21,22 +21,22 @@ JSONType = dict[str, Any] -class DistributionChartType(BaseModel): +class DistributionChart(BaseModel): """Describes an entry for a distribution chart""" - chart_type: Literal["DistributionChartType"] = "DistributionChartType" + chart_type: Literal["DistributionChart"] = "DistributionChart" field: str -class ScatterChartType(BaseModel): +class ScatterChart(BaseModel): """Describes an entry for a scatter plot""" - chart_type: Literal["ScatterChartType"] = "ScatterChartType" + chart_type: Literal["ScatterChart"] = "ScatterChart" x: str y: str -ChartType = Union[DistributionChartType, ScatterChartType] +ChartType = Union[DistributionChart, ScatterChart] class FacetItem(BaseModel): @@ -73,8 +73,18 @@ class FacetInfo(BaseModel): """Data about selected filters for each facet: facet name -> list of values""" +class DebugInfo(StrEnum): + """Debug information to return in the API""" + + aggregations = "aggregations" + lucene_query = "lucene_query" + es_query = "es_query" + + class SearchResponseDebug(BaseModel): - query: JSONType + lucene_query: str | None = None + es_query: JSONType | None = None + aggregations: JSONType | None = None class SearchResponseError(BaseModel): @@ -98,7 +108,7 @@ class SuccessSearchResponse(BaseModel): page: int page_size: int page_count: int - debug: SearchResponseDebug + debug: SearchResponseDebug | None = None took: int timed_out: bool count: int @@ -126,12 +136,6 @@ class QueryAnalysis(BaseModel): es_query: Optional[elasticsearch_dsl.query.Query] = None """The query as an elasticsearch_dsl object""" - fulltext: Optional[str] = None - """The full text part of the query""" - - filter_query: Optional[JSONType] = None - """The filter part of the query""" - facets_filters: Optional[FacetsFilters] = None """The filters corresponding to the facets: a facet name and a list of values""" @@ -141,8 +145,6 @@ def clone(self, **kwargs): text_query=self.text_query, luqum_tree=self.luqum_tree, es_query=self.es_query, - fulltext=self.fulltext, - filter_query=self.filter_query, facets_filters=self.facets_filters, ) for k, v in kwargs.items(): @@ -155,112 +157,102 @@ def _dict_dump(self): "text_query": self.text_query, "luqum_tree": str(self.luqum_tree), "es_query": self.es_query.to_dict(), - "fulltext": self.fulltext, - "filter_query": self.filter_query, "facets_filters": self.facets_filters, } -INDEX_ID_QUERY_PARAM = Query( - description="""Index ID to use for the search, if not provided, the default index is used. - If there is only one index, this parameter is not needed.""" -) +class CommonParametersQuery: + """Documentation and constraints for some common query parameters""" + index_id = Query( + description=cd_( + """Index ID to use for the search, if not provided, the default index is used. + If there is only one index, this parameter is not needed. + """ + ) + ) -class SearchParameters(BaseModel): - """Common parameters for search""" + +class QuerySearchParameters(BaseModel): + """Parameters for search, + this class concentrates on parameters that define the search query + """ q: Annotated[ str | None, Query( - description="""The search query, it supports Lucene search query -syntax (https://lucene.apache.org/core/3_6_0/queryparsersyntax.html). Words -that are not recognized by the lucene query parser are searched as full text -search. + description=cd_( + """The search query, it supports Lucene search query + syntax (https://lucene.apache.org/core/3_6_0/queryparsersyntax.html). Words + that are not recognized by the lucene query parser are searched as full text + search. -Example: `categories_tags:"en:beverages" strawberry brands:"casino"` query use a -filter clause for categories and brands and look for "strawberry" in multiple -fields. + Example: `categories_tags:"en:beverages" strawberry brands:"casino"` query use a + filter clause for categories and brands and look for "strawberry" in multiple + fields. -The query is optional, but `sort_by` value must then be provided.""" + The query is optional, but `sort_by` value must then be provided. + """ + ) ), ] = None - langs: Annotated[ - list[str], + boost_phrase: Annotated[ + bool, Query( - description="""List of languages we want to support during search. -This list should include the user expected language, and additional languages (such -as english for example). + description=cd_( + """This enables an heuristic that will favor, + matching terms that are consecutive. -This is currently used for language-specific subfields to choose in which -subfields we're searching in. + Technically, if you have a query with the two words `whole milk` + it will boost entries with `"whole milk"` exact match. + The boost factor is defined by `match_phrase_boost` value in Configuration -If not provided, `['en']` is used.""" - ), - ] = ["en"] - page_size: Annotated[ - int, Query(description="Number of results to return per page.") - ] = 10 - page: Annotated[int, Query(ge=1, description="Page to request, starts at 1.")] = 1 - fields: Annotated[ - list[str] | None, - Query( - description="List of fields to include in the response. All other fields will be ignored." - ), - ] = None - sort_by: Annotated[ - str | None, - Query( - description=textwrap.dedent( + Note, that it only make sense if you use best match sorting. + So in any other case it is ignored. """ - Field name to use to sort results, the field should exist - and be sortable. If it is not provided, results are sorted by descending relevance score. - - If you put a minus before the name, the results will be sorted by descending order. - - If the field name match a known script (defined in your configuration), - it will be use for sorting. - - In this case you also need to provide additional parameters corresponding to your script parameters. - If a script needs parameters, you can only use the POST method. + ) + ), + ] = False - Beware that this may have a big [impact on performance][perf_link] + langs: Annotated[ + list[str], + Query( + description=cd_( + """List of languages we want to support during search. + This list should include the user expected language, and additional languages (such + as english for example). - Also bare in mind [privacy considerations][privacy_link] if your script parameters contains sensible data. + This is currently used for language-specific subfields to choose in which + subfields we're searching in. - [perf_link]: https://openfoodfacts.github.io/search-a-licious/users/how-to-use-scripts/#performance-considerations - [privacy_link]: https://openfoodfacts.github.io/search-a-licious/users/how-to-use-scripts/#performance-considerations - """ + If not provided, `['en']` is used. + """ ) ), - ] = None - facets: Annotated[ - list[str] | None, - Query( - description="""Name of facets to return in the response as a comma-separated value. - If None (default) no facets are returned.""" - ), - ] = None - charts: Annotated[ - list[ChartType] | None, - Query( - description="""Name of vega representations to return in the response. - Can be distribution chart or scatter plot""" - ), - ] = None - sort_params: Annotated[ - JSONType | None, - Query( - description="""Additional parameters when using a sort script in sort_by. - If the sort script needs parameters, you can only be used the POST method.""", - ), - ] = None + ] = ["en"] + index_id: Annotated[ str | None, - INDEX_ID_QUERY_PARAM, + CommonParametersQuery.index_id, ] = None + @field_validator("langs", mode="before") + @classmethod + def parse_langs_str(cls, langs: str | list[str]) -> list[str]: + """ + Parse for get params 'langs' + """ + value_str = _prepare_str_list(langs) + if value_str: + langs = value_str.split(",") + else: + # we already know because of code logic that langs is the right type + # but we need to cast for mypy type checking + langs = cast(list[str], langs) + + return langs + @model_validator(mode="after") def validate_index_id(self): """ @@ -270,8 +262,7 @@ def validate_index_id(self): because we want to be able to substitute the default None value, by the default index """ - config.check_config_is_defined() - global_config = cast(config.Config, config.CONFIG) + global_config = config.get_config() check_index_id_is_defined(self.index_id, global_config) self.index_id, _ = global_config.get_index_config(self.index_id) return self @@ -287,25 +278,74 @@ def valid_index_id(self) -> str: return self.index_id @model_validator(mode="after") - def validate_q_or_sort_by(self): - """We want at least one of q or sort_by before launching a request""" - if self.q is None and self.sort_by is None: - raise ValueError("`sort_by` must be provided when `q` is missing") + def check_max_results(self): + """Check we don't ask too many results at once""" + if self.page * self.page_size > 10_000: + raise ValueError( + f"Maximum number of returned results is 10 000 (here: page * page_size = {self.page * self.page_size})", + ) return self @cached_property def index_config(self): """Get the index config once and for all""" - global_config = cast(config.Config, config.CONFIG) + global_config = config.get_config() _, index_config = global_config.get_index_config(self.index_id) return index_config @cached_property - def uses_sort_script(self): - """Does sort_by use a script?""" - index_config = self.index_config - _, sort_by = self.sign_sort_by - return sort_by in index_config.scripts.keys() + def langs_set(self): + return set(self.langs) + + @cached_property + def main_lang(self): + """Get the main lang of the query""" + return self.langs[0] if self.langs else "en" + + +class ResultSearchParameters(BaseModel): + """Parameters that influence results presentation: pagination and sorting""" + + fields: Annotated[ + list[str] | None, + Query( + description="List of fields to include in the response. All other fields will be ignored." + ), + ] = None + + page_size: Annotated[ + int, Query(description="Number of results to return per page.") + ] = 10 + + page: Annotated[int, Query(description="Number of results to return per page.")] = 1 + + sort_by: Annotated[ + str | None, + Query( + description=cd_( + """Field name to use to sort results, + the field should exist and be sortable. + If it is not provided, results are sorted by descending relevance score. + (aka best match) + + If you put a minus before the name, the results will be sorted by descending order. + + If the field name match a known script (defined in your configuration), + it will be use for sorting. + + In this case you also need to provide additional parameters corresponding to your script parameters. + If a script needs parameters, you can only use the POST method. + + Beware that this may have a big [impact on performance][perf_link] + + Also bare in mind [privacy considerations][privacy_link] if your script parameters contains sensible data. + + [perf_link]: https://openfoodfacts.github.io/search-a-licious/users/how-to-use-scripts/#performance-considerations + [privacy_link]: https://openfoodfacts.github.io/search-a-licious/users/how-to-use-scripts/#performance-considerations + """ + ) + ), + ] = None @model_validator(mode="after") def sort_by_is_field_or_script(self): @@ -315,34 +355,45 @@ def sort_by_is_field_or_script(self): is_field = sort_by in index_config.fields # TODO: verify field type is compatible with sorting if not (self.sort_by is None or is_field or self.uses_sort_script): - raise ValueError("`sort_by` must be a valid field name or script name") + raise ValueError( + "`sort_by` must be a valid field name or script name or None" + ) return self - @model_validator(mode="after") - def sort_by_scripts_needs_params(self): - """If sort_by is a script, - verify we got corresponding parameters in sort_params - """ - if self.uses_sort_script: - if self.sort_params is None: - raise ValueError( - "`sort_params` must be provided when using a sort script" - ) - if not isinstance(self.sort_params, dict): - raise ValueError("`sort_params` must be a dict") - # verifies keys are those expected - request_keys = set(self.sort_params.keys()) - sort_sign, sort_by = self.sign_sort_by - expected_keys = set(self.index_config.scripts[sort_by].params.keys()) - if request_keys != expected_keys: - missing = expected_keys - request_keys - missing_str = ("missing keys: " + ", ".join(missing)) if missing else "" - new = request_keys - expected_keys - new_str = ("unexpected keys: " + ", ".join(new)) if new else "" - raise ValueError( - f"sort_params keys must match expected keys. {missing_str} {new_str}" - ) - return self + @cached_property + def uses_sort_script(self): + """Does sort_by use a script?""" + index_config = self.index_config + _, sort_by = self.sign_sort_by + return index_config.scripts and sort_by in index_config.scripts.keys() + + +class AggregateSearchParameters(BaseModel): + + facets: Annotated[ + list[str] | None, + Query( + description=cd_( + """Name of facets to return in the response as a comma-separated value. + If None (default) no facets are returned. + """ + ) + ), + ] = None + + charts: Annotated[ + list[ChartType] | None, + Query( + description=cd_( + """Name of vega representations to return in the response. + Can be distribution chart or scatter plot. + + If you pass a simple string, it will be interpreted as a distribution chart, + or a scatter plot if it is two fields separated by a column (x_axis_field:y_axis_field). + """ + ) + ), + ] = None @model_validator(mode="after") def check_facets_are_valid(self): @@ -357,28 +408,23 @@ def check_charts_are_valid(self): """Check that the graph names are valid.""" if self.charts is None: return self - errors = check_all_values_are_fields_agg( self.index_id, [ chart.field for chart in self.charts - if chart.chart_type == "DistributionChartType" + if chart.chart_type == "DistributionChart" ], ) errors.extend( check_fields_are_numeric( self.index_id, - [ - chart.x - for chart in self.charts - if chart.chart_type == "ScatterChartType" - ] + [chart.x for chart in self.charts if chart.chart_type == "ScatterChart"] + [ chart.y for chart in self.charts - if chart.chart_type == "ScatterChartType" + if chart.chart_type == "ScatterChart" ], ) ) @@ -387,20 +433,7 @@ def check_charts_are_valid(self): raise ValueError(errors) return self - @model_validator(mode="after") - def check_max_results(self): - """Check we don't ask too many results at once""" - if self.page * self.page_size > 10_000: - raise ValueError( - f"Maximum number of returned results is 10 000 (here: page * page_size = {self.page * self.page_size})", - ) - return self - - @property - def langs_set(self): - return set(self.langs) - - @property + @cached_property def sign_sort_by(self) -> Tuple[str_utils.BoolOperator, str | None]: return ( ("+", None) @@ -408,31 +441,150 @@ def sign_sort_by(self) -> Tuple[str_utils.BoolOperator, str | None]: else str_utils.split_sort_by_sign(self.sort_by) ) - @property - def main_lang(self): - """Get the main lang of the query""" - return self.langs[0] if self.langs else "en" +def _prepare_str_list(item: Any) -> str | None: + if isinstance(item, str): + return item + elif isinstance(item, list) and all(isinstance(x, str) for x in item): + return ",".join(item) + return None + + +class SearchParameters( + QuerySearchParameters, ResultSearchParameters, AggregateSearchParameters +): + """Parameters for search, common to GET and POST""" + + # forbid extra parameters to prevent failed expectations because of typos + model_config = {"extra": "forbid"} + + debug_info: Annotated[ + list[DebugInfo] | None, + Query( + description=cd_( + """Tells which debug information to return in the response. + It can be a comma separated list of values + """ + ), + ), + ] = None + + @field_validator("debug_info", mode="before") + @classmethod + def debug_info_list_from_str( + cls, debug_info: str | list[str] | list[DebugInfo] | None + ) -> list[DebugInfo] | None: + """We can pass a comma separated list of DebugInfo values as a string""" + # as we are a before validator, we get a list + str_infos = _prepare_str_list(debug_info) + if str_infos: + values = [getattr(DebugInfo, part, None) for part in str_infos.split(",")] + debug_info = [v for v in values if v is not None] + if debug_info is not None: + # we already know because of code logic that debug_info is the right type + # but we need to cast for mypy type checking + debug_info = cast(list[DebugInfo], debug_info) + return debug_info + + +class GetSearchParameters(SearchParameters): + """GET parameters for search""" + + @field_validator("charts", mode="before") + @classmethod + def parse_charts_str( + cls, charts: str | list[str] | list[ChartType] | None + ) -> list[ChartType] | None: + """ + Parse for get params are 'field' or 'xfield:yfield' + separated by ',' for Distribution and Scatter charts. + + Directly the dictionnaries in POST request + """ + str_charts = _prepare_str_list(charts) + if str_charts: + charts = [] + charts_list = str_charts.split(",") + for c in charts_list: + if ":" in c: + [x, y] = c.split(":") + charts.append(ScatterChart(x=x, y=y)) + else: + charts.append(DistributionChart(field=c)) + if charts is not None: + # we already know because of code logic that charts is the right type + # but we need to cast for mypy type checking + charts = cast(list[ChartType], charts) + return charts + + @model_validator(mode="after") + def validate_q_or_sort_by(self): + """We want at least one of q or sort_by before launching a request""" + if self.q is None and self.sort_by is None: + raise ValueError("`sort_by` must be provided when `q` is missing") + return self -def _annotation_new_type(type_, annotation): - """Use a new type for a given annotation""" - return Annotated[type_, *annotation.__metadata__] + @field_validator("facets", "fields", mode="before") + @classmethod + def parse_value_str(cls, value: str | list[str] | None) -> list[str] | None: + """ + Parse for get params 'langs' + """ + value_str = _prepare_str_list(value) + if value_str: + value = value_str.split(",") + if value is not None: + # we already know because of code logic that value is the right type + # but we need to cast for mypy type checking + value = cast(list[str], value) + return value + + @model_validator(mode="after") + def no_sort_by_scripts_on_get(self): + if self.uses_sort_script: + raise ValueError("`sort_by` must not be a script when using GET") + return self -# types for search parameters for GET -SEARCH_PARAMS_ANN = get_type_hints(SearchParameters, include_extras=True) +class PostSearchParameters(SearchParameters): + """POST parameters for search""" + sort_params: Annotated[ + JSONType | None, + Query( + description=cd_( + """Additional parameters when using a sort script in sort_by. + If the sort script needs parameters, you can only be used the POST method. + """ + ), + ), + ] = None -class GetSearchParamsTypes: - q = SEARCH_PARAMS_ANN["q"] - langs = _annotation_new_type(str, SEARCH_PARAMS_ANN["langs"]) - page_size = SEARCH_PARAMS_ANN["page_size"] - page = SEARCH_PARAMS_ANN["page"] - fields = _annotation_new_type(str, SEARCH_PARAMS_ANN["fields"]) - sort_by = SEARCH_PARAMS_ANN["sort_by"] - facets = _annotation_new_type(str, SEARCH_PARAMS_ANN["facets"]) - charts = _annotation_new_type(str, SEARCH_PARAMS_ANN["charts"]) - index_id = SEARCH_PARAMS_ANN["index_id"] + @model_validator(mode="after") + def sort_by_scripts_needs_params(self): + """If sort_by is a script, + verify we got corresponding parameters in sort_params + """ + if self.uses_sort_script: + if self.sort_params is None: + raise ValueError( + "`sort_params` must be provided when using a sort script" + ) + if not isinstance(self.sort_params, dict): + raise ValueError("`sort_params` must be a dict") + # verifies keys are those expected + request_keys = set(self.sort_params.keys()) + sort_sign, sort_by = self.sign_sort_by + expected_keys = set(self.index_config.scripts[sort_by].params.keys()) + if request_keys != expected_keys: + missing = expected_keys - request_keys + missing_str = ("missing keys: " + ", ".join(missing)) if missing else "" + new = request_keys - expected_keys + new_str = ("unexpected keys: " + ", ".join(new)) if new else "" + raise ValueError( + f"sort_params keys must match expected keys. {missing_str} {new_str}" + ) + return self class FetcherStatus(Enum): @@ -453,7 +605,11 @@ class FetcherStatus(Enum): class FetcherResult(BaseModel): - """Result for a document fecher""" + """Result for a document fetcher + + This is also used by pre-processors + who have the opportunity to discard an entry + """ status: FetcherStatus document: JSONType | None diff --git a/app/api.py b/app/api.py index bb11eafe..6f6fcc47 100644 --- a/app/api.py +++ b/app/api.py @@ -10,20 +10,17 @@ from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse, PlainTextResponse, RedirectResponse from fastapi.templating import Jinja2Templates -from pydantic import ValidationError import app.search as app_search from app import config from app._types import ( - INDEX_ID_QUERY_PARAM, - DistributionChartType, - GetSearchParamsTypes, - ScatterChartType, - SearchParameters, + CommonParametersQuery, + GetSearchParameters, + PostSearchParameters, SearchResponse, SuccessSearchResponse, ) -from app.config import check_config_is_defined, settings +from app.config import settings from app.postprocessing import process_taxonomy_completion_response from app.query import build_completion_query from app.utils import connection, get_logger, init_sentry @@ -80,11 +77,11 @@ def check_index_id_is_defined_or_400(index_id: str | None, config: config.Config @app.get("/document/{identifier}") def get_document( - identifier: str, index_id: Annotated[str | None, INDEX_ID_QUERY_PARAM] = None + identifier: str, + index_id: Annotated[str | None, CommonParametersQuery.index_id] = None, ): """Fetch a document from Elasticsearch with specific ID.""" - check_config_is_defined() - global_config = cast(config.Config, config.CONFIG) + global_config = config.get_config() check_index_id_is_defined_or_400(index_id, global_config) index_id, index_config = global_config.get_index_config(index_id) @@ -104,71 +101,40 @@ def get_document( return product +def status_for_response(result: SearchResponse): + if isinstance(result, SuccessSearchResponse): + return status.HTTP_200_OK + else: + # TODO: should we refine that ? + return status.HTTP_500_INTERNAL_SERVER_ERROR + + @app.post("/search") -def search(search_parameters: Annotated[SearchParameters, Body()]): +def search( + response: Response, search_parameters: Annotated[PostSearchParameters, Body()] +): """This is the main search endpoint. It uses POST request to ensure privacy. Under the hood, it calls the :py:func:`app.search.search` function """ - return app_search.search(search_parameters) - - -def parse_charts_get(charts_params: str): - """ - Parse for get params are 'field' or 'xfield:yfield' - separated by ',' for Distribution and Scatter charts. - - Directly the dictionnaries in POST request - """ - charts = [] - for c in charts_params.split(","): - if ":" in c: - [x, y] = c.split(":") - charts.append(ScatterChartType(x=x, y=y)) - else: - charts.append(DistributionChartType(field=c)) - return charts + result = app_search.search(search_parameters) + response.status_code = status_for_response(result) + return result @app.get("/search") def search_get( - q: GetSearchParamsTypes.q = None, - langs: GetSearchParamsTypes.langs = None, - page_size: GetSearchParamsTypes.page_size = 10, - page: GetSearchParamsTypes.page = 1, - fields: GetSearchParamsTypes.fields = None, - sort_by: GetSearchParamsTypes.sort_by = None, - facets: GetSearchParamsTypes.facets = None, - charts: GetSearchParamsTypes.charts = None, - index_id: GetSearchParamsTypes.index_id = None, + response: Response, search_parameters: Annotated[GetSearchParameters, Query()] ) -> SearchResponse: """This is the main search endpoint when using GET request Under the hood, it calls the :py:func:`app.search.search` function """ - # str to lists - langs_list = langs.split(",") if langs else ["en"] - fields_list = fields.split(",") if fields else None - facets_list = facets.split(",") if facets else None - charts_list = parse_charts_get(charts) if charts else None - # create SearchParameters object - try: - search_parameters = SearchParameters( - q=q, - langs=langs_list, - page_size=page_size, - page=page, - fields=fields_list, - sort_by=sort_by, - facets=facets_list, - index_id=index_id, - charts=charts_list, - ) - return app_search.search(search_parameters) - except ValidationError as e: - raise HTTPException(status_code=400, detail=str(e)) + result = app_search.search(search_parameters) + response.status_code = status_for_response(result) + return result @app.get("/autocomplete") @@ -191,11 +157,10 @@ def taxonomy_autocomplete( int | None, Query(description="Fuzziness level to use, default to no fuzziness."), ] = None, - index_id: Annotated[str | None, INDEX_ID_QUERY_PARAM] = None, + index_id: Annotated[str | None, CommonParametersQuery.index_id] = None, ): """API endpoint for autocompletion using taxonomies""" - check_config_is_defined() - global_config = cast(config.Config, config.CONFIG) + global_config = config.get_config() check_index_id_is_defined_or_400(index_id, global_config) index_id, index_config = global_config.get_index_config(index_id) taxonomy_names_list = taxonomy_names.split(",") @@ -239,7 +204,7 @@ def html_search( page_size: int = 24, langs: str = "fr,en", sort_by: str | None = None, - index_id: Annotated[str | None, INDEX_ID_QUERY_PARAM] = None, + index_id: Annotated[str | None, CommonParametersQuery.index_id] = None, # Display debug information in the HTML response display_debug: bool = False, ): diff --git a/app/charts.py b/app/charts.py index 03efff66..39b3f46b 100644 --- a/app/charts.py +++ b/app/charts.py @@ -4,8 +4,8 @@ from ._types import ( ChartsInfos, ChartType, - DistributionChartType, - ScatterChartType, + DistributionChart, + ScatterChart, SuccessSearchResponse, ) @@ -44,7 +44,7 @@ def empty_chart(chart_name): def build_distribution_chart( - chart: DistributionChartType, values, index_config: config.IndexConfig + chart: DistributionChart, values, index_config: config.IndexConfig ): """ Return the vega structure for a Bar Chart @@ -139,7 +139,7 @@ def build_distribution_chart( def build_scatter_chart( - chart_option: ScatterChartType, search_result, index_config: config.IndexConfig + chart_option: ScatterChart, search_result, index_config: config.IndexConfig ): """ Build a scatter plot only for values from search_results @@ -242,7 +242,7 @@ def build_charts( aggregations = search_result.aggregations for requested_chart in requested_charts: - if requested_chart.chart_type == "ScatterChartType": + if requested_chart.chart_type == "ScatterChart": charts[f"{requested_chart.x}:{requested_chart.y}"] = build_scatter_chart( requested_chart, search_result, index_config ) diff --git a/app/cli/main.py b/app/cli/main.py index c3f001ed..e100c504 100644 --- a/app/cli/main.py +++ b/app/cli/main.py @@ -24,16 +24,14 @@ def _get_index_config( config_path: Optional[Path], index_id: Optional[str] ) -> tuple[str, "app.config.IndexConfig"]: - from typing import cast from app import config - from app.config import check_config_is_defined, set_global_config + from app.config import set_global_config if config_path: set_global_config(config_path) - check_config_is_defined() - global_config = cast(config.Config, config.CONFIG) + global_config = config.get_config() index_id, index_config = global_config.get_index_config(index_id) if index_config is None: raise typer.BadParameter( @@ -102,7 +100,6 @@ def import_data( start_time = time.perf_counter() index_id, index_config = _get_index_config(config_path, index_id) - num_errors = run_items_import( input_path, num_processes, @@ -131,24 +128,53 @@ def import_taxonomies( default=None, help=INDEX_ID_HELP, ), + skip_indexing: bool = typer.Option( + default=False, + help="Skip putting taxonomies in the ES index", + ), + skip_synonyms: bool = typer.Option( + default=False, + help="Skip creating synonyms files for ES analyzers", + ), ): """Import taxonomies into Elasticsearch. - It get taxonomies json files as specified in the configuration file. + It download taxonomies json files as specified in the configuration file. + + It creates taxonomies indexes (for auto-completion). + + It creates synonyms files for ElasticSearch analyzers + (enabling full text search to benefits from synonyms). """ import time - from app._import import perform_taxonomy_import - from app.utils import get_logger + from app._import import perform_refresh_synonyms, perform_taxonomy_import + from app.utils import connection, get_logger logger = get_logger() index_id, index_config = _get_index_config(config_path, index_id) - start_time = time.perf_counter() - perform_taxonomy_import(index_config) - end_time = time.perf_counter() - logger.info("Import time: %s seconds", end_time - start_time) + # open a connection for this process + connection.get_es_client(request_timeout=120, retry_on_timeout=True) + + if skip_indexing: + logger.info("Skipping indexing of taxonomies") + else: + start_time = time.perf_counter() + perform_taxonomy_import(index_config) + end_time = time.perf_counter() + logger.info("Import time: %s seconds", end_time - start_time) + if skip_synonyms: + logger.info("Skipping synonyms generation") + else: + start_time = time.perf_counter() + perform_refresh_synonyms( + index_id, + index_config, + ) + end_time = time.perf_counter() + logger.info("Synonyms generation time: %s seconds", end_time - start_time) @cli.command() @@ -181,6 +207,50 @@ def sync_scripts( ) +@cli.command() +def cleanup_indexes( + config_path: Optional[Path] = typer.Option( + default=None, + help="path of the yaml configuration file, it overrides CONFIG_PATH envvar", + dir_okay=False, + file_okay=True, + exists=True, + ), + index_id: Optional[str] = typer.Option( + default=None, + help=f"{INDEX_ID_HELP}\nIf not specified, all indexes are cleaned", + ), +): + """Clean old indexes that are not active anymore (no aliases) + + As you do full import of data or update taxonomies, + old indexes are not removed automatically. + (in the case you want to roll back or compare). + + This command will remove all indexes that are not active anymore. + """ + import time + + from app._import import perform_cleanup_indexes + from app.utils import get_logger + + logger = get_logger() + if index_id: + _, index_config = _get_index_config(config_path, index_id) + index_configs = [index_config] + else: + _get_index_config(config_path, None) # just to set global config variable + from app.config import get_config + + index_configs = list(get_config().indices.values()) + start_time = time.perf_counter() + removed = 0 + for index_config in index_configs: + removed += perform_cleanup_indexes(index_config) + end_time = time.perf_counter() + logger.info("Removed %d indexes in %s seconds", removed, end_time - start_time) + + @cli.command() def run_update_daemon( config_path: Optional[Path] = typer.Option( @@ -199,11 +269,10 @@ def run_update_daemon( It is optional but enables having an always up-to-date index, for applications where data changes. """ - from typing import cast from app import config from app._import import run_update_daemon - from app.config import check_config_is_defined, set_global_config, settings + from app.config import set_global_config, settings from app.utils import get_logger, init_sentry # Create root logger @@ -214,8 +283,7 @@ def run_update_daemon( if config_path: set_global_config(config_path) - check_config_is_defined() - global_config = cast(config.Config, config.CONFIG) + global_config = config.get_config() run_update_daemon(global_config) diff --git a/app/config.py b/app/config.py index 2be9d545..f820099b 100644 --- a/app/config.py +++ b/app/config.py @@ -1,3 +1,4 @@ +import functools import logging from enum import StrEnum, auto from inspect import cleandoc as cd_ @@ -5,7 +6,14 @@ from typing import Annotated, Any import yaml -from pydantic import BaseModel, Field, HttpUrl, field_validator, model_validator +from pydantic import ( + BaseModel, + Field, + FileUrl, + HttpUrl, + field_validator, + model_validator, +) from pydantic.json_schema import GenerateJsonSchema from pydantic_settings import BaseSettings @@ -121,6 +129,12 @@ class Settings(BaseSettings): description="User-Agent used when fetching resources (taxonomies) or documents" ), ] = "search-a-licious" + synonyms_path: Annotated[ + Path, + Field( + description="Path of the directory that will contain synonyms for ElasticSearch instances" + ), + ] = Path("/opt/search/synonyms") settings = Settings() @@ -199,7 +213,7 @@ class TaxonomySourceConfig(BaseModel): ), ] url: Annotated[ - HttpUrl, + FileUrl | HttpUrl, Field( description=cd_( """URL of the taxonomy. @@ -238,6 +252,7 @@ class FieldType(StrEnum): Tokenization will use analyzers specific to each languages. * taxonomy: a field akin to keyword but with support for matching using taxonomy synonyms and translations + (and in fact also a text mapping possibility) * disabled: a field that is not stored nor searchable (see [Elasticsearch help]) * object: this field contains a dict with sub-fields. @@ -340,7 +355,7 @@ class FieldConfig(BaseModel): It is used to return a 'faceted-view' with the number of results for each facet value, or to generate bar charts. - Only valid for keyword or numeric field types. + Only valid for keyword, taxonomy or numeric field types. """ ) ), @@ -356,23 +371,14 @@ class FieldConfig(BaseModel): ) ), ] = None - add_taxonomy_synonyms: Annotated[ - bool, - Field( - description=cd_( - """if True, add all synonyms of the taxonomy values to the index. - The flag is ignored if the field type is not `taxonomy`. - """ - ) - ), - ] = True @model_validator(mode="after") def bucket_agg_should_be_used_for_keyword_and_numeric_types_only(self): """Validator that checks that `bucket_agg` is only provided for fields with types `keyword`, `double`, `float`, `integer` or `bool`.""" if self.bucket_agg and not ( - self.type.is_numeric() or self.type in (FieldType.keyword, FieldType.bool) + self.type.is_numeric() + or self.type in (FieldType.keyword, FieldType.bool, FieldType.taxonomy) ): raise ValueError( "bucket_agg should be provided for taxonomy or numeric type only" @@ -484,35 +490,22 @@ class TaxonomyConfig(BaseModel): """Configuration of taxonomies, that is collections of entries with synonyms in multiple languages. + See [Explain taxonomies](../explain-taxonomies) + Field may be linked to taxonomies. It enables enriching search with synonyms, as well as providing suggestions, or informative facets. + + Note: if you define taxonomies, you must import them using + [import-taxonomies command](../ref-python/cli.html#python3-m-app-import-taxonomies) """ sources: Annotated[ list[TaxonomySourceConfig], Field(description="Configurations of taxonomies that this project will use."), ] - exported_langs: Annotated[ - list[str], - Field( - description=cd_( - """a list of languages for which - we want taxonomized fields to be always exported during indexing. - - During indexing, we use the taxonomy to translate every taxonomized field - in a language-specific subfield. - - The list of language depends on the value defined here and on the optional - `taxonomy_langs` field that can be defined in each document. - - Beware that providing many language might inflate the index size. - """, - ) - ), - ] index: Annotated[ TaxonomyIndexConfig, Field(description=TaxonomyIndexConfig.__doc__), @@ -695,13 +688,38 @@ class IndexConfig(BaseModel): float, Field( description=cd_( - """How much we boost exact matches on individual fields + """How much we boost exact matches on consecutive words - This only makes sense when using "best match" order. + That is, if you search "Dark Chocolate", + it will boost entries that have the "Dark Chocolate" phrase (in the same field). + + It only applies to free text search. + + This only makes sense when using + "boost_phrase" request parameters and "best match" order. + + Note: this field accept float of string, + because using float might generate rounding problems. + The string must represent a float. """ ) ), ] = 2.0 + match_phrase_boost_proximity: Annotated[ + int | None, + Field( + description=cd_( + """How much we allow proximity for `match_phrase_boost`. + + If unspecified we will just match word to word. + Otherwise it will allow some gap between words matching + + This only makes sense when using + "boost_phrase" request parameters and "best match" order. + """ + ) + ), + ] = None document_denylist: Annotated[ set[str], Field( @@ -764,6 +782,15 @@ def field_references_must_exist_and_be_valid(self): return self + @field_validator("fields") + @classmethod + def ensure_no_fields_use_reserved_name(cls, fields: dict[str, FieldConfig]): + """Verify that no field name clashes with a reserved name""" + used_reserved = set(["last_indexed_datetime", "_id"]) & set(fields.keys()) + if used_reserved: + raise ValueError(f"The field names {','.join(used_reserved)} are reserved") + return fields + @field_validator("fields") @classmethod def add_field_name_to_each_field(cls, fields: dict[str, FieldConfig]): @@ -772,31 +799,42 @@ def add_field_name_to_each_field(cls, fields: dict[str, FieldConfig]): field_item.name = field_name return fields - def get_supported_langs(self) -> set[str]: - """Return the set of supported languages for `text_lang` fields. - - It's used to know which language-specific subfields to create. - """ - return ( - set(self.supported_langs or []) - # only keep langs for which a built-in analyzer built-in, other - # langs will be stored in a unique `other` subfield - ) & set(ANALYZER_LANG_MAPPING) - - def get_taxonomy_langs(self) -> set[str]: - """Return the set of exported languages for `taxonomy` fields. - - It's used to know which language-specific subfields to create. - """ - # only keep langs for which a built-in analyzer built-in, other - # langs will be stored in a unique `other` subfield - return (set(self.taxonomy.exported_langs)) & set(ANALYZER_LANG_MAPPING) - def get_fields_with_bucket_agg(self): return [ field_name for field_name, field in self.fields.items() if field.bucket_agg ] + @functools.cached_property + def text_lang_fields(self) -> dict[str, FieldConfig]: + """List all text_lang fields in an efficient way""" + return { + field_name: field + for field_name, field in self.fields.items() + if field.type == FieldType.text_lang + } + + @functools.cached_property + def supported_langs_set(self): + return frozenset(self.supported_langs) + + @functools.cached_property + def lang_fields(self) -> dict[str, FieldConfig]: + """Fully qualified name of fields that are translated""" + return { + fname: field + for fname, field in self.fields.items() + if field.type in ["taxonomy", "text_lang"] + } + + @functools.cached_property + def full_text_fields(self) -> dict[str, FieldConfig]: + """Fully qualified name of fields that are part of default full text search""" + return { + fname: field + for fname, field in self.fields.items() + if field.full_text_search + } + CONFIG_DESCRIPTION_INDICES = """ A Search-a-licious instance only have one configuration file, @@ -870,25 +908,33 @@ def from_yaml(cls, path: Path) -> "Config": return cls(**data) -# CONFIG is a global variable that contains the search-a-licious configuration +# _CONFIG is a global variable that contains the search-a-licious configuration # used. It is specified by the envvar CONFIG_PATH. -CONFIG: Config | None = None -if settings.config_path: - if not settings.config_path.is_file(): - raise RuntimeError(f"config file does not exist: {settings.config_path}") +# use get_config() to access it. +_CONFIG: Config | None = None - CONFIG = Config.from_yaml(settings.config_path) +def get_config() -> Config: + """Return the object containing global configuration -def check_config_is_defined(): - """Raise a RuntimeError if the Config path is not set.""" - if CONFIG is None: + It raises if configuration was not yet set + """ + if _CONFIG is None: raise RuntimeError( "No configuration is configured, set envvar " "CONFIG_PATH with the path of the yaml configuration file" ) + return _CONFIG def set_global_config(config_path: Path): - global CONFIG - CONFIG = Config.from_yaml(config_path) + global _CONFIG + _CONFIG = Config.from_yaml(config_path) + return _CONFIG + + +if settings.config_path: + if not settings.config_path.is_file(): + raise RuntimeError(f"config file does not exist: {settings.config_path}") + + set_global_config(settings.config_path) diff --git a/app/es_query_builder.py b/app/es_query_builder.py new file mode 100644 index 00000000..8ce9a071 --- /dev/null +++ b/app/es_query_builder.py @@ -0,0 +1,187 @@ +"""This module creates a specific ESQueryBuilder, +that will be able to handle the full text search correctly +""" + +import luqum +from luqum.elasticsearch.tree import EPhrase, EWord +from luqum.elasticsearch.visitor import ElasticsearchQueryBuilder + +from ._types import JSONType +from .config import IndexConfig +from .exceptions import FreeWildCardError, QueryAnalysisError + +DEFAULT_FIELD_MARKER = "_searchalicious_text" + + +class FullTextMixin: + """Implementation of json query transformation to use a query + on all needed field according to full_text_search configurations + """ + + # attributes provided in implementations + index_config: IndexConfig + query_langs: list[str] + + MATCH_TO_MULTI_MATCH_TYPE = { + "match": "best_fields", + "match_phrase": "phrase", + } + + @property + def full_text_query_fields(self) -> list[str]: + """List fields to match upon in full text search""" + fields = [] + lang_fields = set(self.index_config.lang_fields) + supported_langs = set(self.index_config.supported_langs) & set(self.query_langs) + + for fname, field in self.index_config.full_text_fields.items(): + if fname in lang_fields: + for lang in supported_langs: + subfield_name = f"{field.name}.{lang}" + fields.append(subfield_name) + else: + fields.append(field.name) + return fields + + def _transform_query(self, query): + """Transform the query generated by luqum transformer + to a query on all necessary fields. + """ + fields = self.full_text_query_fields + if "exists" in query: + raise FreeWildCardError( + "Free wildcards are not allowed in full text queries" + ) + if "query_string" in query: + # no need to transform, just add fields + query["query_string"]["fields"] = fields + elif "match" in query or "match_phrase" in query: + query_type = list(k for k in query.keys() if k.startswith("match"))[0] + # go for multi_match + inner_json = query["multi_match"] = query.pop(query_type) + inner_json.update(inner_json.pop(self.field)) + inner_json["fields"] = fields + inner_json["type"] = self.MATCH_TO_MULTI_MATCH_TYPE[query_type] + else: + raise QueryAnalysisError( + f"Unexpected query type while analyzing full text query: {query.keys()}" + ) + return query + + +class EFullTextWord(EWord, FullTextMixin): + """Item that may generates a multi_match for word on default field""" + + def __init__(self, index_config: IndexConfig, query_langs=list[str], **kwargs): + super().__init__(**kwargs) + self.index_config = index_config + self.query_langs = query_langs + + @property + def json(self): + """Generate the JSON specific to our requests""" + # let's use the normal way to generate the json + query = super().json + # but modify request if we are on default field + if self.field == DEFAULT_FIELD_MARKER: + query = self._transform_query(query) + return query + + +class EFullTextPhrase(EPhrase, FullTextMixin): + """Item that generates a multi_match for phrase on default field""" + + def __init__(self, index_config: IndexConfig, query_langs=list[str], **kwargs): + super().__init__(**kwargs) + self.index_config = index_config + self.query_langs = query_langs + + @property + def json(self): + """Generate the JSON specific to our requests""" + # let's use the normal way to generate the json + query = super().json + # but modify request depending on the request type + if self.field == DEFAULT_FIELD_MARKER: + query = self._transform_query(query) + return query + + +class FullTextQueryBuilder(ElasticsearchQueryBuilder): + """We have our own ESQueryBuilder, + just to be able to use our FullTextItemFactory, + instead of the default ElasticSearchItemFactory + """ + + def __init__(self, **kwargs): + # sanity check, before overriding below + if "default_field" in kwargs: + raise NotImplementedError("You should not override default_field") + super().__init__( + # we put a specific marker on default_field + # because we want to be sure we recognize them + default_field=DEFAULT_FIELD_MARKER, + **kwargs, + ) + + # fix until https://github.com/jurismarches/luqum/issues/106 is resolved + def _get_operator_extract(self, binary_operation, delta=8): + try: + return super()._get_operator_extract(binary_operation, delta) + except IndexError: + return str(binary_operation) + + def visit_word(self, node, context): + """Specialize the query corresponding to word, + in the case of full text search + """ + fields = self._fields(context) + if fields == [DEFAULT_FIELD_MARKER]: + # we are in a full text query + # it's analyzed, don't bother with term + method = "match_phrase" if self.match_word_as_phrase else "match" + yield self.es_item_factory.build( + EFullTextWord, + q=node.value, + method=method, + # we keep fields, we will deal with it in EFullTextWord + fields=fields, + _name=self.get_name(node, context), + index_config=context["index_config"], + query_langs=context["query_langs"], + ) + else: + yield from super().visit_word(node, context) + + def visit_phrase(self, node, context): + """Specialize the query corresponding to phrase, + in the case of full text search + """ + fields = self._fields(context) + if fields == [DEFAULT_FIELD_MARKER]: + # we are in a full text query + # we know it's analyzed, don't bother with term + yield self.es_item_factory.build( + EFullTextPhrase, + phrase=node.value, + fields=self._fields(context), + _name=self.get_name(node, context), + index_config=context["index_config"], + query_langs=context["query_langs"], + ) + else: + yield from super().visit_phrase(node, context) + + def __call__( + self, tree: luqum.tree.Item, index_config: IndexConfig, query_langs: list[str] + ) -> JSONType: + """We add two parameters: + + :param index_config: the index config we are working on + :param query_langs: the target languages of current query + """ + self.nesting_checker(tree) + # we add our parameters to the context + context = {"index_config": index_config, "query_langs": query_langs} + elastic_tree = self.visit(tree, context) + return elastic_tree[0].json diff --git a/app/exceptions.py b/app/exceptions.py new file mode 100644 index 00000000..b338a271 --- /dev/null +++ b/app/exceptions.py @@ -0,0 +1,35 @@ +class QueryAnalysisError(Exception): + """Exception while building a query.""" + + +class InvalidLuceneQueryError(QueryAnalysisError): + """Invalid query, can't be analyzed by luqum""" + + +class FreeWildCardError(QueryAnalysisError): + """You can't use '*' alone without specifying a search field""" + + +class UnknownFieldError(QueryAnalysisError): + """An unknown field name was used in the query""" + + +class UnknownScriptError(QueryAnalysisError): + """An unknown script name was used in the query""" + + +class QueryCheckError(QueryAnalysisError): + """Encountered errors while checking Query""" + + def __init__(self, *args, errors: list[str]): + super().__init__(*args) + self.errors = errors + + def __str__(self): + errors = "\n - " + "\n - ".join(self.errors) + return f"{', '.join(self.args)}: {errors}" + + def __repr__(self): + return ( + f"{self.__class__.__name__}({', '.join(self.args)}, errors={self.errors})" + ) diff --git a/app/facets.py b/app/facets.py index 4b633a62..d7df76ad 100644 --- a/app/facets.py +++ b/app/facets.py @@ -17,6 +17,13 @@ def _get_translations( lang: str, items: list[tuple[str, str]], index_config: config.IndexConfig ) -> dict[tuple[str, str], str]: + """Get translations for a list of items + + :param lang: target language + :param items: list of (entry id, field_name) + :param index_config: the index configuration + :return: a dict mapping (id, field_name) to the translation + """ # go from field_name to taxonomy field_names = set([field_name for _, field_name in items]) field_taxonomy: dict[str, str] = { @@ -25,7 +32,7 @@ def _get_translations( for field_name in field_names if index_config.fields[field_name].taxonomy_name } - # fetch items names + # fetch items names within a single query items_to_fetch = [ (id, field_taxonomy[field_name]) for id, field_name in items @@ -35,24 +42,24 @@ def _get_translations( # compute best translations translations: dict[tuple[str, str], str] = {} for id, field_name in items: - item_translations = None + item_translation = None names = ( items_names.get((id, field_taxonomy[field_name])) if field_name in field_taxonomy else None ) if names: - item_translations = names.get(lang, None) + item_translation = names.get(lang, None) # fold back to main language for item - if not item_translations: + if not item_translation: main_lang = id.split(":", 1)[0] - item_translations = names.get(main_lang, None) + item_translation = names.get(main_lang, None) # fold back to english - if not translations: - item_translations = names.get("en", None) + if not item_translation: + item_translation = names.get("en", None) # eventually translate - if item_translations: - translations[(id, field_name)] = item_translations[0] + if item_translation: + translations[(id, field_name)] = item_translation return translations diff --git a/app/indexing.py b/app/indexing.py index 2c48e32d..9d09f776 100644 --- a/app/indexing.py +++ b/app/indexing.py @@ -14,11 +14,14 @@ FieldType, IndexConfig, TaxonomyConfig, - TaxonomySourceConfig, ) -from app.taxonomy import get_taxonomy from app.utils import load_class_object_from_string -from app.utils.analyzers import AUTOCOMPLETE_ANALYZERS +from app.utils.analyzers import ( + get_autocomplete_analyzer, + get_taxonomy_indexing_analyzer, + get_taxonomy_search_analyzer, + number_of_fields, +) FIELD_TYPE_TO_DSL_TYPE = { FieldType.keyword: dsl_field.Keyword, @@ -37,46 +40,46 @@ def generate_dsl_field( - field: FieldConfig, supported_langs: Iterable[str], taxonomy_langs: Iterable[str] + field: FieldConfig, supported_langs: Iterable[str] ) -> dsl_field.Field: """Generate Elasticsearch DSL field from a FieldConfig. + This will be used to generate the Elasticsearch mapping. + + This is an important part, because it will define the behavior of each field. + :param field: the field to use as input :param supported_langs: an iterable of languages (2-letter codes), - used to know which sub-fields to create for `text_lang` field types - :param taxonomy_langs: an iterabl of languages (2-letter codes), - used to know which sub-fields to create for `taxonomy` field types + used to know which sub-fields to create for `text_lang` + and `taxonomy` field types :return: the elasticsearch_dsl field """ if field.type is FieldType.taxonomy: - # in `other`, we store the text of all languages that don't have a - # built-in ES analyzer. By using a single field, we don't create as - # many subfields as there are supported languages - properties = {"other": dsl_field.Text(analyzer=analyzer("standard"))} - for lang in taxonomy_langs: - if lang in ANALYZER_LANG_MAPPING: - properties[lang] = dsl_field.Text( - analyzer=analyzer(ANALYZER_LANG_MAPPING[lang]) - ) - return dsl_field.Object( - required=field.required, dynamic=False, properties=properties - ) - + # We will store the taxonomy identifier as keyword + # And also store it in subfields with query analyzers for each language, + # that will activate synonyms and specific normalizations + if field.taxonomy_name is None: + raise ValueError("Taxonomy field must have a taxonomy_name set in config") + sub_fields = { + lang: dsl_field.Text( + # we almost use keyword analyzer as we really map synonyms to a keyword + analyzer=get_taxonomy_indexing_analyzer(field.taxonomy_name, lang), + # but on query we need to fold and match with synonyms + search_analyzer=get_taxonomy_search_analyzer( + field.taxonomy_name, lang, with_synonyms=True + ), + ) + for lang in supported_langs + } + return dsl_field.Keyword(required=field.required, fields=sub_fields) elif field.type is FieldType.text_lang: properties = { - # we use `other` field for the same reason as for the `taxonomy` - # type - "other": dsl_field.Text(analyzer=analyzer("standard")), - # Add subfield used to save main language version for `text_lang` - "main": dsl_field.Text(analyzer=analyzer("standard")), + lang: dsl_field.Text( + analyzer=analyzer(ANALYZER_LANG_MAPPING.get(lang, "standard")), + ) + for lang in supported_langs } - for lang in supported_langs: - if lang in ANALYZER_LANG_MAPPING: - properties[lang] = dsl_field.Text( - analyzer=analyzer(ANALYZER_LANG_MAPPING[lang]) - ) return dsl_field.Object(dynamic=False, properties=properties) - elif field.type == FieldType.object: return dsl_field.Object(dynamic=True) elif field.type == FieldType.disabled: @@ -170,12 +173,7 @@ def process_text_lang_field( else: # here key is the lang 2-letters code key = target_field.rsplit(lang_separator, maxsplit=1)[-1] - # Here we check whether the language is supported, otherwise - # we use the default "other" field, that aggregates texts - # from all unsupported languages - # it's the only subfield that is a list instead of a string if key not in supported_langs: - field_input.setdefault("other", []).append(input_value) continue field_input[key] = input_value @@ -188,82 +186,25 @@ def process_taxonomy_field( field: FieldConfig, taxonomy_config: TaxonomyConfig, split_separator: str, - taxonomy_langs: set[str], ) -> JSONType | None: """Process data for a `taxonomy` field type. - Generates a dict ready to be indexed by Elasticsearch, with a subfield for - each language. Two other subfields are added: - - - `original`: the original value of the field. For example, if the field - name is `categories` and `categories` already exist in the document, - we will save its value in the `original` subfield. This subfield is - only added if the field is present in the input data. - - - `other`: the value of the field for languages that are not supported by - the project (no elasticsearch specific analyzers) + There is not much to be done here, + as the magic of synonyms etc. happens by ES itself, + thanks to our mapping definition, + and a bit at query time. :param data: input data, as a dict :param field: the field config - :param taxonomy_config: the taxonomy config :param split_separator: the separator used to split the input field value, in case of multi-valued input (if `field.split` is True) - :param taxonomy_langs: a set of supported languages (2-letter codes), used - to know which sub-fields to create. - :return: the processed data, as a dict + :return: the processed value """ - field_input: JSONType = {} input_field = field.get_input_field() input_value = preprocess_field_value( data, input_field, split=field.split, split_separator=split_separator ) - if input_value is None: - return None - - taxonomy_sources_by_name = { - source.name: source for source in taxonomy_config.sources - } - taxonomy_source_config: TaxonomySourceConfig = taxonomy_sources_by_name[ - field.taxonomy_name # type: ignore - ] - taxonomy = get_taxonomy( - taxonomy_source_config.name, str(taxonomy_source_config.url) - ) - - # to know in which language we should translate the tags using the - # taxonomy, we use: - # - the language list defined in the taxonomy config: for every item, we - # translate the tags for this list of languages - # - a custom list of supported languages for the item (`taxonomy_langs` - # field), this is used to allow indexing tags for an item that is available - # in specific countries - langs = taxonomy_langs | set(data.get("taxonomy_langs", [])) - for lang in langs: - for single_tag in input_value: - if single_tag not in taxonomy: - continue - - node = taxonomy[single_tag] - values = {node.get_localized_name(lang)} - - if field.add_taxonomy_synonyms: - values |= set(node.get_synonyms(lang)) - - # Add international version of the name - if "xx" in node.names: - values |= set(node.get_synonyms("xx")) - - for value in values: - if value is not None: - # If language is not supported (=no elasticsearch specific - # analyzers), we store the data in a "other" field - key = lang if lang in ANALYZER_LANG_MAPPING else "other" - field_input.setdefault(key, []).append(value) - - if field.name in data: - field_input["original"] = data[field.name] - - return field_input if field_input else None + return input_value if input_value else None class DocumentProcessor: @@ -273,8 +214,7 @@ class DocumentProcessor: def __init__(self, config: IndexConfig) -> None: self.config = config - self.supported_langs = config.get_supported_langs() - self.taxonomy_langs = config.get_taxonomy_langs() + self.supported_langs_set = config.supported_langs_set self.preprocessor: BaseDocumentPreprocessor | None if config.preprocessor is not None: @@ -283,6 +223,47 @@ def __init__(self, config: IndexConfig) -> None: else: self.preprocessor = None + def inputs_from_data(self, id_, processed_data: JSONType) -> JSONType: + """Generate a dict with the data to be indexed in ES""" + inputs = { + "last_indexed_datetime": datetime.datetime.utcnow().isoformat(), + "_id": id_, + } + for field in self.config.fields.values(): + input_field = field.get_input_field() + + if field.type == FieldType.text_lang: + # dispath languages in a sub-dictionary + field_input = process_text_lang_field( + processed_data, + input_field=field.get_input_field(), + split=field.split, + lang_separator=self.config.lang_separator, + split_separator=self.config.split_separator, + supported_langs=self.supported_langs_set, + ) + # nothing to do, all the magic of subfield is done thanks to ES + elif field.type == FieldType.taxonomy: + field_input = process_taxonomy_field( + data=processed_data, + field=field, + taxonomy_config=self.config.taxonomy, + split_separator=self.config.split_separator, + ) + + else: + field_input = preprocess_field_value( + processed_data, + input_field, + split=field.split, + split_separator=self.config.split_separator, + ) + + if field_input: + inputs[field.name] = field_input + + return inputs + def from_result(self, result: FetcherResult) -> FetcherResult: """Generate an item ready to be indexed by elasticsearch-dsl from a fetcher result. @@ -325,87 +306,72 @@ def from_result(self, result: FetcherResult) -> FetcherResult: processed_data = processed_result.document - inputs = { - "last_indexed_datetime": datetime.datetime.utcnow().isoformat(), - "_id": _id, - } - for field in self.config.fields.values(): - input_field = field.get_input_field() - - if field.type == FieldType.text_lang: - field_input = process_text_lang_field( - processed_data, - input_field=field.get_input_field(), - split=field.split, - lang_separator=self.config.lang_separator, - split_separator=self.config.split_separator, - supported_langs=self.supported_langs, - ) - - elif field.type == FieldType.taxonomy: - field_input = process_taxonomy_field( - data=processed_data, - field=field, - taxonomy_config=self.config.taxonomy, - split_separator=self.config.split_separator, - taxonomy_langs=self.taxonomy_langs, - ) - - else: - field_input = preprocess_field_value( - processed_data, - input_field, - split=field.split, - split_separator=self.config.split_separator, - ) - - if field_input: - inputs[field.name] = field_input + inputs = self.inputs_from_data(_id, processed_data) return FetcherResult(status=processed_result.status, document=inputs) def generate_mapping_object(config: IndexConfig) -> Mapping: + """ES Mapping for project index, that will contain the data""" mapping = Mapping() supported_langs = config.supported_langs - taxonomy_langs = config.taxonomy.exported_langs + # note: when we reference new analyzers in the mapping as analyzers objects, + # Elasticsearch DSL will reference them in the analyzer section by itself for field in config.fields.values(): mapping.field( field.name, - generate_dsl_field( - field, supported_langs=supported_langs, taxonomy_langs=taxonomy_langs - ), + generate_dsl_field(field, supported_langs=supported_langs), ) # date of last index for the purposes of search + # this is a field internal to Search-a-licious and independent of the project mapping.field("last_indexed_datetime", dsl_field.Date(required=True)) return mapping def generate_index_object(index_name: str, config: IndexConfig) -> Index: + """Index configuration for project index, that will contain the data""" index = Index(index_name) - index.settings( - number_of_shards=config.index.number_of_shards, - number_of_replicas=config.index.number_of_replicas, - ) + settings = { + "number_of_shards": config.index.number_of_shards, + "number_of_replicas": config.index.number_of_replicas, + } mapping = generate_mapping_object(config) + num_fields = number_of_fields(mapping) + # add 25% margin + num_fields = int(num_fields * 1.25) + if num_fields > 1000: + # default limit is 1000 fields, set a specific one + settings["index.mapping.total_fields.limit"] = num_fields + index.settings(**settings) index.mapping(mapping) return index def generate_taxonomy_mapping_object(config: IndexConfig) -> Mapping: + """ES Mapping for indexes containing taxonomies entries""" mapping = Mapping() supported_langs = config.supported_langs mapping.field("id", dsl_field.Keyword(required=True)) mapping.field("taxonomy_name", dsl_field.Keyword(required=True)) mapping.field( - "names", + "name", + dsl_field.Object( + required=True, + dynamic=False, + properties={ + lang: dsl_field.Keyword(required=False) for lang in supported_langs + }, + ), + ), + mapping.field( + "synonyms", dsl_field.Object( required=True, dynamic=False, properties={ lang: dsl_field.Completion( - analyzer=AUTOCOMPLETE_ANALYZERS.get(lang, "simple"), + analyzer=get_autocomplete_analyzer(lang), contexts=[ { "name": "taxonomy_name", @@ -422,6 +388,9 @@ def generate_taxonomy_mapping_object(config: IndexConfig) -> Mapping: def generate_taxonomy_index_object(index_name: str, config: IndexConfig) -> Index: + """ + Index configuration for indexes containing taxonomies entries + """ index = Index(index_name) taxonomy_index_config = config.taxonomy.index index.settings( diff --git a/app/openfoodfacts.py b/app/openfoodfacts.py index a319aff8..1a4adc6e 100644 --- a/app/openfoodfacts.py +++ b/app/openfoodfacts.py @@ -9,7 +9,6 @@ from app._types import FetcherResult, FetcherStatus, JSONType from app.indexing import BaseDocumentPreprocessor from app.postprocessing import BaseResultProcessor -from app.taxonomy import get_taxonomy from app.utils.download import http_session from app.utils.log import get_logger @@ -121,42 +120,29 @@ def fetch_document(self, stream_name: str, item: JSONType) -> FetcherResult: class DocumentPreprocessor(BaseDocumentPreprocessor): + def preprocess(self, document: JSONType) -> FetcherResult: # no need to have a deep-copy here document = copy.copy(document) # convert obsolete field into bool document["obsolete"] = bool(document.get("obsolete")) - document["taxonomy_langs"] = self.get_taxonomy_langs(document) + # add "main" language to text_lang fields + self.add_main_language(document) # Don't keep all nutriment values self.select_nutriments(document) return FetcherResult(status=FetcherStatus.FOUND, document=document) - def get_taxonomy_langs(self, document: JSONType) -> list[str]: - # We add `taxonomy_langs` field to index taxonomized fields in - # the language of the product. To determine the list of - # `taxonomy_langs`, we check: - # - `languages_code` - # - `countries_tags`: we add every official language of the countries - # where the product can be found. - taxonomy_langs = set(document.get("languages_codes", [])) - countries_tags = document.get("countries_tags", []) - country_taxonomy = get_taxonomy("country", COUNTRIES_TAXONOMY_URL) - - for country_tag in countries_tags: - # Check that `country_tag` is in taxonomy - if (country_node := country_taxonomy[country_tag]) is not None: - # Get all official languages of the country, and add them to - # `taxonomy_langs` - if ( - lang_codes := country_node.properties.get("language_codes", {}).get( - "en" - ) - ) is not None: - taxonomy_langs |= set( - lang_code for lang_code in lang_codes.split(",") if lang_code - ) + def add_main_language(self, document: JSONType) -> None: + """We add a "main" language to translated fields (text_lang and taxonomies) - return list(taxonomy_langs) + This enables searching in the main language of the product. + This is important because most of the time, + products have no entry for a lot of language, + so this is an interesting fall-back. + """ + for field in self.config.text_lang_fields: + if field in document: + document[field + "_main"] = document[field] def select_nutriments(self, document: JSONType): """Only selected interesting nutriments, as there are hundreds of @@ -191,6 +177,9 @@ def process_after(self, result: JSONType) -> JSONType: @staticmethod def build_image_fields(product: JSONType): + """Images are stored in a weird way in Open Food Facts, + We want to make it far more simple to use in results. + """ # Python copy of the code from # https://github.com/openfoodfacts/openfoodfacts-server/blob/b297ed858d526332649562cdec5f1d36be184984/lib/ProductOpener/Display.pm#L10128 code = product["code"] diff --git a/app/postprocessing.py b/app/postprocessing.py index 890f1fdb..40d0f87a 100644 --- a/app/postprocessing.py +++ b/app/postprocessing.py @@ -1,7 +1,7 @@ from elasticsearch_dsl.response import Response from app._types import JSONType -from app.config import FieldType, IndexConfig +from app.config import IndexConfig from app.utils import load_class_object_from_string @@ -10,6 +10,9 @@ def __init__(self, config: IndexConfig) -> None: self.config = config def process(self, response: Response, projection: set[str] | None) -> JSONType: + """Post process results to add some information, + or transform results to flatten them + """ output = { "took": response.took, "timed_out": response.timed_out, @@ -21,17 +24,16 @@ def process(self, response: Response, projection: set[str] | None) -> JSONType: result = hit.to_dict() result["_score"] = hit.meta.score - for field in self.config.fields.values(): - if field.name not in result: + # TODO make it an unsplit option or move to specific off post processing + for fname in self.config.text_lang_fields: + if fname not in result: continue - - if field.type is FieldType.text_lang: - lang_values = result.pop(field.name) - for lang, text in lang_values.items(): - suffix = "" if lang == "main" else f"_{lang}" - result[f"{field.name}{suffix}"] = text - elif field.type is FieldType.taxonomy: - result[field.name] = result.pop(field.name)["original"] + # Flatten the language dict + lang_values = result.pop(fname) + for lang, text in lang_values.items(): + # FIXME: this reproduces OFF behaviour, but is this a good thing? + suffix = "" if lang == "main" else f"_{lang}" + result[f"{fname}{suffix}"] = text result = self.process_after(result) if projection: diff --git a/app/query.py b/app/query.py index 66fa7968..b5299902 100644 --- a/app/query.py +++ b/app/query.py @@ -1,13 +1,13 @@ import elastic_transport import elasticsearch import luqum.exceptions -from elasticsearch_dsl import A, Q, Search +from elasticsearch_dsl import A, Search from elasticsearch_dsl.aggs import Agg -from elasticsearch_dsl.query import Query from luqum import tree -from luqum.elasticsearch import ElasticsearchQueryBuilder from luqum.elasticsearch.schema import SchemaAnalyzer +from luqum.elasticsearch.visitor import ElasticsearchQueryBuilder from luqum.parser import parser +from luqum.utils import OpenRangeTransformer, UnknownOperationResolver from ._types import ( ErrorSearchResponse, @@ -20,9 +20,16 @@ SuccessSearchResponse, ) from .config import FieldType, IndexConfig +from .es_query_builder import FullTextQueryBuilder from .es_scripts import get_script_id +from .exceptions import InvalidLuceneQueryError, QueryCheckError, UnknownScriptError from .indexing import generate_index_object from .postprocessing import BaseResultProcessor +from .query_transformers import ( + LanguageSuffixTransformer, + PhraseBoostTransformer, + QueryCheck, +) from .utils import get_logger, str_utils logger = get_logger(__name__) @@ -35,146 +42,27 @@ def build_elasticsearch_query_builder(config: IndexConfig) -> ElasticsearchQuery options = SchemaAnalyzer(index.to_dict()).query_builder_options() # we default to a AND between terms that are just space separated options["default_operator"] = ElasticsearchQueryBuilder.MUST - return ElasticsearchQueryBuilder(**options) - - -def build_query_clause(query: str, langs: list[str], config: IndexConfig) -> Query: - fields = [] - supported_langs = config.get_supported_langs() - taxonomy_langs = config.get_taxonomy_langs() - match_phrase_boost_queries = [] - - for field in config.fields.values(): - # We don't include all fields in the multi-match clause, only a subset - # of them - if field.full_text_search: - if field.type in (FieldType.taxonomy, FieldType.text_lang): - # language subfields are not the same depending on whether the - # field is a `taxonomy` or a `text_lang` field - langs_subset = frozenset( - supported_langs - if field.type is FieldType.text_lang - else taxonomy_langs - ) - field_match_phrase_boost_queries = [] - for lang in (_lang for _lang in langs if _lang in langs_subset): - subfield_name = f"{field.name}.{lang}" - fields.append(subfield_name) - field_match_phrase_boost_queries.append( - Q( - "match_phrase", - **{ - subfield_name: { - "query": query, - "boost": config.match_phrase_boost, - } - }, - ) - ) - if len(field_match_phrase_boost_queries) == 1: - match_phrase_boost_queries.append( - field_match_phrase_boost_queries[0] - ) - elif len(field_match_phrase_boost_queries) > 1: - match_phrase_boost_queries.append( - Q("bool", should=field_match_phrase_boost_queries) - ) - - else: - fields.append(field.name) - match_phrase_boost_queries.append( - Q( - "match_phrase", - **{ - field.name: { - "query": query, - "boost": config.match_phrase_boost, - } - }, - ) - ) - - multi_match_query = Q("multi_match", query=query, fields=fields) - - if match_phrase_boost_queries: - multi_match_query |= Q("bool", should=match_phrase_boost_queries) - - return multi_match_query + # remove default_field + options.pop("default_field", None) + return FullTextQueryBuilder(**options) def parse_query(q: str | None) -> QueryAnalysis: """Begin query analysis by parsing the query.""" analysis = QueryAnalysis(text_query=q) - if q is None: + if q is None or not q.strip(): return analysis try: analysis.luqum_tree = parser.parse(q) + # FIXME: resolve UnknownFilter (to AND) except ( luqum.exceptions.ParseError, luqum.exceptions.InconsistentQueryException, ) as e: - # if the lucene syntax is invalid, consider the query as plain text - logger.warning("parsing error for query: '%s':\n%s", q, e) - analysis.luqum_tree = None + raise InvalidLuceneQueryError("Request could not be analyzed by luqum") from e return analysis -def decompose_query( - q: QueryAnalysis, filter_query_builder: ElasticsearchQueryBuilder -) -> QueryAnalysis: - """Decompose the query into two parts: - - - a Lucene DSL query, which is used as a filter clause in the - Elasticsearch query. Luqum library is used to transform the - Lucene DSL into Elasticsearch DSL. - - remaining terms, used for full text search. - - :param q: the user query - :param filter_query_builder: Luqum query builder - :return: a tuple containing the Elasticsearch filter clause and - the remaining terms for full text search - """ - if q.text_query is None: - return q - remaining_terms = "" - if q.luqum_tree is not None: - # Successful parsing - logger.debug("parsed luqum tree: %s", repr(q.luqum_tree)) - word_children = [] - filter_children = [] - if isinstance(q.luqum_tree, (tree.UnknownOperation, tree.AndOperation)): - for child in q.luqum_tree.children: - if isinstance(child, tree.Word): - word_children.append(child) - else: - filter_children.append(child) - elif isinstance(q.luqum_tree, tree.Word): - # the query single term - word_children.append(q.luqum_tree) - else: - filter_children.append(q.luqum_tree) - # We join with space every non word not recognized by the parser - remaining_terms = " ".join(item.value for item in word_children) - filter_tree = None - if filter_children: - # Note: we always wrap in AndOperation, - # even if only one, to be consistent - filter_tree = tree.AndOperation(*filter_children) - - # remove harvested words - logger.debug("filter luqum tree: %s", repr(filter_tree)) - if filter_tree: - filter_query = filter_query_builder(filter_tree) - else: - filter_query = None - logger.debug("filter query from luqum: '%s'", filter_query) - else: - filter_query = None - remaining_terms = q.text_query - - return q.clone(fulltext=remaining_terms, filter_query=filter_query) - - def compute_facets_filters(q: QueryAnalysis) -> QueryAnalysis: """Extract facets filters from the query @@ -264,7 +152,7 @@ def parse_sort_by_script( operator, sort_by = str_utils.split_sort_by_sign(sort_by) script = (config.scripts or {}).get(sort_by) if script is None: - raise ValueError(f"Unknown script '{sort_by}'") + raise UnknownScriptError(f"Unknown script '{sort_by}'") script_id = get_script_id(index_id, sort_by) # join params and static params script_params = dict((params or {}), **(script.static_params or {})) @@ -297,45 +185,110 @@ def create_aggregation_clauses( return clauses +def add_languages_suffix( + analysis: QueryAnalysis, langs: list[str], config: IndexConfig +) -> QueryAnalysis: + """Add correct languages suffixes to fields of type text_lang or taxonomy + + This match in a langage OR another + """ + if analysis.luqum_tree is None: + return analysis + transformer = LanguageSuffixTransformer( + lang_fields=set(config.lang_fields), langs=langs + ) + analysis.luqum_tree = transformer.visit(analysis.luqum_tree) + return analysis + + +def resolve_unknown_operation(analysis: QueryAnalysis) -> QueryAnalysis: + """Resolve unknown operations in the query to a AND""" + if analysis.luqum_tree is None: + return analysis + transformer = UnknownOperationResolver(resolve_to=tree.AndOperation) + analysis.luqum_tree = transformer.visit(analysis.luqum_tree) + return analysis + + +def boost_phrases( + analysis: QueryAnalysis, boost: float, proximity: int | None +) -> QueryAnalysis: + """Boost all phrases in the query""" + if analysis.luqum_tree is None: + return analysis + transformer = PhraseBoostTransformer(boost=boost, proximity=proximity) + analysis.luqum_tree = transformer.visit(analysis.luqum_tree) + return analysis + + +def check_query(params: SearchParameters, analysis: QueryAnalysis) -> None: + """Run some sanity checks on the luqum query""" + if analysis.luqum_tree is None: + return + checker = QueryCheck(index_config=params.index_config, zeal=1) + errors = checker.errors(analysis.luqum_tree) + if errors: + raise QueryCheckError("Found errors while checking query", errors=errors) + + +def resolve_open_ranges(analysis: QueryAnalysis) -> QueryAnalysis: + """We need to resolve open ranges to closed ranges + before using elasticsearch query builder""" + if analysis.luqum_tree is None: + return analysis + transformer = OpenRangeTransformer() + analysis.luqum_tree = transformer.visit(analysis.luqum_tree) + return analysis + + def build_search_query( params: SearchParameters, - filter_query_builder: ElasticsearchQueryBuilder, + es_query_builder: ElasticsearchQueryBuilder, ) -> QueryAnalysis: """Build an elasticsearch_dsl Query. - :param q: the user raw query - :param langs: the set of languages we want to support, it is used to - select language subfields for some field types - :param size: number of results to return - :param page: requested page (starts at 1). - :param config: the index configuration to use - :param filter_query_builder: luqum elasticsearch query builder - :param sort_by: sorting key, defaults to None (=relevance-based sorting) + :param params: SearchParameters containing all search parameters + :param es_query_builder: the builder to transform + the luqum tree to an elasticsearch query :return: the built Search query """ analysis = parse_query(params.q) - analysis = decompose_query(analysis, filter_query_builder) analysis = compute_facets_filters(analysis) + analysis = resolve_unknown_operation(analysis) + analysis = resolve_open_ranges(analysis) + if params.boost_phrase and params.sort_by is None: + analysis = boost_phrases( + analysis, + params.index_config.match_phrase_boost, + params.index_config.match_phrase_boost_proximity, + ) + # add languages for localized fields + analysis = add_languages_suffix(analysis, params.langs, params.index_config) + # we are at a goop point to check the query + check_query(params, analysis) - logger.debug("filter query: %s", analysis.filter_query) - logger.debug("remaining terms: '%s'", analysis.fulltext) + logger.debug("luqum query: %s", analysis.luqum_tree) - return build_es_query(analysis, params) + return build_es_query(analysis, params, es_query_builder) def build_es_query( - q: QueryAnalysis, + analysis: QueryAnalysis, params: SearchParameters, + es_query_builder: ElasticsearchQueryBuilder, ) -> QueryAnalysis: config = params.index_config es_query = Search(index=config.index.name) - - if q.fulltext: - base_multi_match_q = build_query_clause(q.fulltext, params.langs, config) - es_query = es_query.query(base_multi_match_q) - - if q.filter_query: - es_query = es_query.query("bool", filter=q.filter_query) + # main query + if analysis.luqum_tree is not None: + try: + es_query = es_query.query( + es_query_builder(analysis.luqum_tree, params.index_config, params.langs) + ) + except luqum.exceptions.InconsistentQueryException as e: + raise InvalidLuceneQueryError( + "Request could not be transformed by luqum" + ) from e agg_fields = set(params.facets) if params.facets is not None else set() if params.charts is not None: @@ -343,7 +296,7 @@ def build_es_query( [ chart.field for chart in params.charts - if chart.chart_type == "DistributionChartType" + if chart.chart_type == "DistributionChart" ] ) for agg_name, agg in create_aggregation_clauses(config, agg_fields).items(): @@ -363,7 +316,7 @@ def build_es_query( size=params.page_size, from_=params.page_size * (params.page - 1), ) - return q.clone(es_query=es_query) + return analysis.clone(es_query=es_query) def build_completion_query( @@ -386,7 +339,7 @@ def build_completion_query( """ completion_clause = { - "field": f"names.{lang}", + "field": f"synonyms.{lang}", "size": size, "contexts": {"taxonomy_name": taxonomy_names}, } diff --git a/app/query_transformers.py b/app/query_transformers.py new file mode 100644 index 00000000..051eb26a --- /dev/null +++ b/app/query_transformers.py @@ -0,0 +1,218 @@ +import re + +import luqum.check +import luqum.visitor +from luqum import tree + +from .config import IndexConfig + + +class LanguageSuffixTransformer(luqum.visitor.TreeTransformer): + """This transformer adds a language suffix to lang_fields fields, + for any languages in langs (the languages we want to query on). + + That is `field1:something` will become + `field1:en:something OR field1:fr:something` + + Note: we do this only for the query parts that have a search field, + the text search without specifying a field + is handled by the ElasticSearch query builder + """ + + def __init__(self, lang_fields=set[str], langs=list[str], **kwargs): + # we need to track parents to get full field name + super().__init__(track_parents=True, track_new_parents=False, **kwargs) + self.langs = langs + self.lang_fields = lang_fields + + def visit_search_field(self, node, context): + """As we reach a search_field, + if it's one that have a lang, + we replace single expression with a OR on sub-language fields + """ + # FIXME: verify again the way luqum work on this side ! + field_name = node.name + # add eventual parents + prefix = ".".join( + node.name + for node in context.get("parents", ()) + if isinstance(node, tree.SearchField) + ) + if prefix: + field_name = f"{prefix}.{field_name}" + # is it a lang dependant field + if field_name in self.lang_fields: + # create a new expression for each languages + new_nodes = [] + for lang in self.langs: + # note: we don't have to care about having searchfield in children + # because only complete field_name would match a self.lang_fields + (new_node,) = self.generic_visit(node, context) + # add language prefix + new_node.name = f"{new_node.name}.{lang}" + new_nodes.append(new_node) + if len(new_nodes) > 1: + yield tree.OrOperation(*new_nodes) + else: + yield from new_nodes + else: + # default + yield from self.generic_visit(node, context) + + +def get_consecutive_words( + node: tree.BoolOperation, +) -> list[list[tuple[int, tree.Word]]]: + """Return a list of list of consecutive words, + with their index, in a bool operation + """ + consecutive: list[list[tuple[int, tree.Word]]] = [[]] + for i, child in enumerate(node.children): + if isinstance(child, tree.Word): + # append to last list + consecutive[-1].append((i, child)) + else: + # we have discontinuity + if len(consecutive[-1]) == 1: + # one term alone is not enough, clear the list + consecutive[-1] = [] + elif consecutive[-1]: + # create a new list + consecutive.append([]) + # remove last list if empty or only one element + if len(consecutive[-1]) <= 1: + consecutive.pop() + return consecutive + + +class PhraseBoostTransformer(luqum.visitor.TreeTransformer): + """This transformer boosts terms that are consecutive + and might be found in a query + + For example if we have `Whole AND Milk AND Cream` + we will boost items containing `"Whole Milk Cream"`, + the new expression will look like + (here with a boost of 2 and proxmity of 3): + `((Whole AND Milk AND Cream^2) OR "Whole Milk Cream"^2.0~3)` + + We also only apply it to terms that are not for a specified field. + + Note: It won't work on UnknownOperation, so you'd better resolve them before. + + :param boost: how much to boost consecutive terms + :param proximity: proxmity of the boosted phrase, enable to match with gaps + :param only_free_text: only apply to text without an explicit search field defined + """ + + def __init__( + self, boost: float, proximity: int | None = 1, only_free_text=True, **kwargs + ): + super().__init__(track_parents=True, track_new_parents=False, **kwargs) + # we transform float to str, + # because otherwise decimal.Decimal will make it look weird + self.boost = str(boost) + self.proximity = proximity + self.only_free_text = only_free_text + + def _get_consecutive_words(self, node): + return get_consecutive_words(node) + + def _phrase_boost_from_words(self, words): + """Given a group of words, give the new operation""" + expr = " ".join(word.value for word in words) + expr = f'"{expr}"' + phrase = tree.Phrase(expr) + if self.proximity: + phrase = tree.Proximity(phrase, degree=self.proximity) + phrase = tree.Boost(phrase, force=self.boost, head=" ") + new_expr = tree.Group( + tree.OrOperation(tree.Group(tree.AndOperation(*words), tail=" "), phrase) + ) + # tail and head transfer, to have good looking str + new_expr.head = words[0].head + words[0].head = "" + new_expr.tail = words[-1].tail + words[-1].tail = "" + return new_expr + + def visit_and_operation(self, node, context): + """As we find an OR operation try to boost consecutive word terms""" + # get the or operation with cloned children + (new_node,) = list(super().generic_visit(node, context)) + do_boost_phrases = True + if self.only_free_text: + # we don't do it if a parent is a SearchField + do_boost_phrases = not any( + isinstance(p, tree.SearchField) for p in context.get("parents", ()) + ) + if do_boost_phrases: + # group consecutive terms in AndOperations + consecutive = self._get_consecutive_words(new_node) + if consecutive: + # We have to modify children + # by replacing consecutive words with our new expressions. + # We use indexes for that. + new_children = [] + # change first word by the new operation + index_to_change = { + words[0][0]: self._phrase_boost_from_words( + [word[1] for word in words] + ) + for words in consecutive + } + # remove other words that are part of the expression + # (and we will keep the rest) + index_to_remove = set( + word[0] for words in consecutive for word in words[1:] + ) + for i, child in enumerate(new_node.children): + if i in index_to_change: + new_children.append(index_to_change[i]) + elif i not in index_to_remove: + new_children.append(child) + # substitute children of the new node + new_node.children = new_children + yield new_node + + +class QueryCheck(luqum.check.LuceneCheck): + """Sanity checks on luqum request""" + + # TODO: port to luqum + SIMPLE_EXPR_FIELDS = luqum.check.LuceneCheck.SIMPLE_EXPR_FIELDS + ( + tree.Range, + tree.OpenRange, + ) + FIELD_EXPR_FIELDS = SIMPLE_EXPR_FIELDS + (tree.FieldGroup,) + # TODO: shan't luqum should support "." in field names + field_name_re = re.compile(r"^[\w.]+$") + + def __init__(self, index_config: IndexConfig, **kwargs): + super().__init__(**kwargs) + self.index_config = index_config + + # TODO: this should be in LuceneCheck ! + def check_phrase(self, item, parents): + return iter([]) + + def check_open_range(self, item, parents): + return iter([]) + + def check_search_field(self, item, parents): + """Check if the search field is valid""" + yield from super().check_search_field(item, parents) + # might be an inner field get all parents fields + fields = [p.name for p in parents if isinstance(p, tree.SearchField)] + [ + item.name + ] + # join and split to normalize and only have one field + field_names = (".".join(fields)).split(".") + # remove eventual lang suffix + has_lang_suffix = field_names[-1] in self.index_config.supported_langs_set + if has_lang_suffix: + field_names.pop() + is_sub_field = len(field_names) > 1 + # check field exists in config, but only for non sub-field + # (TODO until we implement them in config) + if not is_sub_field and (field_names[0] not in self.index_config.fields): + yield f"Search field '{'.'.join(field_names)}' not found in index config" diff --git a/app/search.py b/app/search.py index ce27eaf1..6a607a55 100644 --- a/app/search.py +++ b/app/search.py @@ -2,7 +2,14 @@ from typing import cast from . import config -from ._types import SearchParameters, SearchResponse, SuccessSearchResponse +from ._types import ( + DebugInfo, + QueryAnalysis, + SearchParameters, + SearchResponse, + SearchResponseDebug, + SuccessSearchResponse, +) from .charts import build_charts from .facets import build_facets from .postprocessing import BaseResultProcessor, load_result_processor @@ -10,23 +17,47 @@ logger = logging.getLogger(__name__) -if config.CONFIG is None: - # We want to be able to import api.py (for tests for example) without - # failure, but we add a warning message as it's not expected in a - # production settings - logger.warning("Main configuration is not set, use CONFIG_PATH envvar") - FILTER_QUERY_BUILDERS = {} - RESULT_PROCESSORS = {} -else: - # we cache query builder and result processor here for faster processing - FILTER_QUERY_BUILDERS = { - index_id: build_elasticsearch_query_builder(index_config) - for index_id, index_config in config.CONFIG.indices.items() - } - RESULT_PROCESSORS = { - index_id: load_result_processor(index_config) - for index_id, index_config in config.CONFIG.indices.items() - } + +# we cache query builder and result processor here for faster processing +_ES_QUERY_BUILDERS = {} +_RESULT_PROCESSORS = {} + + +def get_es_query_builder(index_id): + if index_id not in _ES_QUERY_BUILDERS: + index_config = config.get_config().indices[index_id] + _ES_QUERY_BUILDERS[index_id] = build_elasticsearch_query_builder(index_config) + return _ES_QUERY_BUILDERS[index_id] + + +def get_result_processor(index_id): + if index_id not in _RESULT_PROCESSORS: + index_config = config.get_config().indices[index_id] + _RESULT_PROCESSORS[index_id] = load_result_processor(index_config) + return _RESULT_PROCESSORS[index_id] + + +def add_debug_info( + search_result: SuccessSearchResponse, + analysis: QueryAnalysis, + params: SearchParameters, +) -> SearchResponseDebug | None: + if not params.debug_info: + return None + data = {} + for debug_info in params.debug_info: + match debug_info: + case DebugInfo.es_query: + data[debug_info.value] = ( + analysis.es_query.to_dict() if analysis.es_query else None + ) + case DebugInfo.lucene_query: + data[debug_info.value] = ( + str(analysis.luqum_tree) if analysis.luqum_tree else None + ) + case DebugInfo.aggregations: + data[debug_info.value] = search_result.aggregations + return SearchResponseDebug(**data) def search( @@ -34,7 +65,7 @@ def search( ) -> SearchResponse: """Run a search""" result_processor = cast( - BaseResultProcessor, RESULT_PROCESSORS[params.valid_index_id] + BaseResultProcessor, get_result_processor(params.valid_index_id) ) logger.debug( "Received search query: q='%s', langs='%s', page=%d, " @@ -50,13 +81,18 @@ def search( index_config = params.index_config query = build_search_query( params, - # filter query builder is generated from elasticsearch mapping and + # ES query builder is generated from elasticsearch mapping and # takes ~40ms to generate, build-it before hand to avoid this delay - filter_query_builder=FILTER_QUERY_BUILDERS[params.valid_index_id], + es_query_builder=get_es_query_builder(params.valid_index_id), ) - logger.debug( - "Elasticsearch query: %s", - query.es_query.to_dict() if query.es_query else query.es_query, + ( + logger.debug( + "Luqum query: %s\nElasticsearch query: %s", + str(query.luqum_tree), + query.es_query.to_dict() if query.es_query else query.es_query, + ) + if logger.isEnabledFor(logging.DEBUG) # avoid processing if no debug + else None ) projection = set(params.fields) if params.fields else None @@ -72,6 +108,7 @@ def search( search_result, query, params.main_lang, index_config, params.facets ) search_result.charts = build_charts(search_result, index_config, params.charts) - # remove aggregations to avoid sending too much information + search_result.debug = add_debug_info(search_result, query, params) + # remove aggregations search_result.aggregations = None return search_result diff --git a/app/taxonomy.py b/app/taxonomy.py index 6b7b6d05..e1face38 100644 --- a/app/taxonomy.py +++ b/app/taxonomy.py @@ -3,6 +3,7 @@ See also :py:mod:`app.taxonomy_es` """ +from collections.abc import Iterator from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Set, Union @@ -10,7 +11,7 @@ import requests from app._types import JSONType -from app.config import settings +from app.config import TaxonomyConfig, settings from app.utils import get_logger from app.utils.download import download_file, http_session, should_download_file from app.utils.io import load_json @@ -339,6 +340,12 @@ def get_taxonomy( ~/.cache/openfoodfacts/taxonomy :return: a Taxonomy """ + if taxonomy_url.startswith("file://"): + # just use the file, it's already local + fpath = taxonomy_url[len("file://") :] + if not fpath.startswith("/"): + raise RuntimeError("Relative path (not yet) supported for taxonomy url") + return Taxonomy.from_path(fpath.rstrip("/")) filename = f"{taxonomy_name}.json" cache_dir = DEFAULT_CACHE_DIR if cache_dir is None else cache_dir @@ -353,3 +360,10 @@ def get_taxonomy( logger.info("Downloading taxonomy, saving it in %s", taxonomy_path) download_file(taxonomy_url, taxonomy_path) return Taxonomy.from_path(taxonomy_path) + + +def iter_taxonomies(taxonomy_config: TaxonomyConfig) -> Iterator[tuple[str, Taxonomy]]: + for taxonomy_source_config in taxonomy_config.sources: + yield taxonomy_source_config.name, get_taxonomy( + taxonomy_source_config.name, str(taxonomy_source_config.url) + ) diff --git a/app/taxonomy_es.py b/app/taxonomy_es.py index 24361c6e..a8f713e4 100644 --- a/app/taxonomy_es.py +++ b/app/taxonomy_es.py @@ -3,16 +3,24 @@ See also :py:mod:`app.taxonomy` """ +import os +import re +import shutil +from pathlib import Path + from elasticsearch_dsl import Search from elasticsearch_dsl.query import Q from app.config import IndexConfig +from app.taxonomy import Taxonomy, iter_taxonomies +from app.utils import connection +from app.utils.io import safe_replace_dir def get_taxonomy_names( items: list[tuple[str, str]], config: IndexConfig, -) -> dict[tuple[str, str], dict[str, list[str]]]: +) -> dict[tuple[str, str], dict[str, str]]: """Given a set of terms in different taxonomies, return their names""" filters = [] for id, taxonomy_name in items: @@ -24,6 +32,88 @@ def get_taxonomy_names( .params(size=len(filters)) ) return { - (result.id, result.taxonomy_name): result.names.to_dict() + (result.id, result.taxonomy_name): result.name.to_dict() for result in query.execute().hits } + + +def _normalize_synonym(token: str) -> str: + """Normalize a synonym, + + It applies the same filter as ES will apply before the synonym filter + to ensure matching tokens + """ + # make lower case + token = token.lower() + # changes anything that is neither a word char nor a space for space + token = re.sub(r"[^\w\s]+", " ", token) + # normalize spaces + token = re.sub(r"\s+", " ", token) + # TODO: should we also run asciifolding or so ? Or depends on language ? + return token + + +def create_synonyms_files(taxonomy: Taxonomy, langs: list[str], target_dir: Path): + """Create a set of files that can be used to define a Synonym Graph Token Filter + + We will match every known synonym in a language + to the identifier of the entry. + We do this because we are not sure which is the main language for an entry. + + Also the special xx language is added to every languages if it exists. + + see: + https://www.elastic.co/guide/en/elasticsearch/reference/current/search-with-synonyms.html#synonyms-store-synonyms-file + """ + + # auto-generate synonyms files for each language, ready to write to + synonyms_paths = {lang: (target_dir / f"{lang}.txt") for lang in langs} + synonyms_files = {lang: fpath.open("w") for lang, fpath in synonyms_paths.items()} + + for node in taxonomy.iter_nodes(): + # we add multi lang synonyms to every language + multi_lang_synonyms = node.synonyms.get("xx", []) + multi_lang_synonyms = [_normalize_synonym(s) for s in multi_lang_synonyms] + # also node id without prefix + multi_lang_synonyms.append(_normalize_synonym(node.id.split(":", 1)[-1])) + multi_lang_synonyms = [s for s in multi_lang_synonyms if s.strip()] + for lang, synonyms in node.synonyms.items(): + if (not synonyms and not multi_lang_synonyms) or lang not in langs: + continue + # avoid commas in synonyms… add multilang syns and identifier without prefix + synonyms_ = (_normalize_synonym(s) for s in synonyms) + synonyms = [s for s in synonyms_ if s.strip()] + synonyms = sorted(set(synonyms + multi_lang_synonyms)) + synonyms = [s for s in synonyms if s.strip()] + if synonyms: + synonyms_files[lang].write(f"{','.join(synonyms)} => {node.id}\n") + + # close files + for f in synonyms_files.values(): + f.close() + + +def create_synonyms(index_config: IndexConfig, target_dir: Path): + for name, taxonomy in iter_taxonomies(index_config.taxonomy): + target = target_dir / name + # a temporary directory, we move at the end + target_tmp = target_dir / f"{name}.tmp" + shutil.rmtree(target_tmp, ignore_errors=True) + # ensure directory + os.makedirs(target_tmp, mode=0o775, exist_ok=True) + # generate synonyms files + create_synonyms_files(taxonomy, index_config.supported_langs, target_tmp) + # move to final location, overriding previous files + safe_replace_dir(target, target_tmp) + # Note: in current deployment, file are shared between ES instance, + # so we don't need to replicate the files + + +def refresh_synonyms(index_name: str, index_config: IndexConfig, target_dir: Path): + create_synonyms(index_config, target_dir) + es = connection.current_es_client() + if es.indices.exists(index=index_name): + # trigger update of synonyms in token filters by reloading search analyzers + # and clearing relevant cache + es.indices.reload_search_analyzers(index=index_name) + es.indices.clear_cache(index=index_name, request=True) diff --git a/app/utils/analyzers.py b/app/utils/analyzers.py index f091daba..f89f356d 100644 --- a/app/utils/analyzers.py +++ b/app/utils/analyzers.py @@ -1,15 +1,183 @@ """Defines some analyzers for the elesaticsearch fields.""" -from elasticsearch_dsl import analyzer - -#: An analyzer for the autocomplete field -AUTOCOMPLETE_ANALYZERS = { - "fr": analyzer( - "autocomplete_fr", tokenizer="standard", filter=["lowercase", "asciifolding"] - ), - "de": analyzer( - "autocomplete_de", - tokenizer="standard", - filter=["lowercase", "german_normalization"], - ), +from typing import Optional + +from elasticsearch_dsl import Mapping +from elasticsearch_dsl import analysis as dsl_analysis +from elasticsearch_dsl import analyzer, char_filter, token_filter + +from app._types import JSONType + +# some normalizers existing in ES that are specific to some languages +SPECIAL_NORMALIZERS = { + "ar": "arabic_normalization", + "bn": "bengali_normalization", + "de": "german_normalization", + "hi": "hindi_normalization", + "inc": "indic_normalization", + "fa": "persian_normalization", + "sv": "scandinavian_folding", + "da": "scandinavian_folding", + "no": "scandinavian_folding", + "fi": "scandinavian_folding", + "sr": "serbian_normalization", + "ckb": "sorani_normalization", +} + + +# TODO: this could be provided by the taxonomy / per language +STOP_WORDS = { + "ar": "_arabic_", + "hy": "_armenian_", + "eu": "_basque_", + "bn": "_bengali_", + # "pt_BR": _brazilian_ + "bg": "_bulgarian_", + "ca": "_catalan_", + "ja": "_cjk_", + "zh": "_cjk_", + "ko": "_cjk_", + "cs": "_czech_", + "da": "_danish_", + "nl": "_dutch_", + "en": "_english_", + "et": "_estonian_", + "fi": "_finnish_", + "fr": "_french_", + "gl": "_galician_", + "de": "_german_", + "el": "_greek_", + "hi": "_hindi_", + "hu": "_hungarian_", + "id": "_indonesian_", + "ga": "_irish_", + "it": "_italian_", + "lv": "_latvian_", + "lt": "_lithuanian_", + "no": "_norwegian_", + "fa": "_persian_", + "pt": "_portuguese_", + "ro": "_romanian_", + "ru": "_russian_", + "sr": "_serbian_", + # "": "_sorani_", + "es": "_spanish_", + "sv": "_swedish_", + "th": "_thai_", + "tr": "_turkish_ ", } + + +def get_taxonomy_synonym_filter(taxonomy: str, lang: str) -> dsl_analysis.TokenFilter: + """Return the synonym filter to use for the taxonomized field analyzer""" + return token_filter( + f"synonym_graph_{taxonomy}_{lang}", + type="synonym_graph", + synonyms_path=f"synonyms/{taxonomy}/{lang}.txt", + updateable=True, + ) + + +def get_taxonomy_stop_words_filter( + taxonomy: str, lang: str +) -> Optional[dsl_analysis.TokenFilter]: + """Return the stop words filter to use for the taxonomized field analyzer + + IMPORTANT: de-activated for now ! + If we want to handle them, we have to remove them in synonyms, so we need the list. + """ + stop_words = STOP_WORDS.get(lang) + # deactivate for now + if False and stop_words: + return token_filter( + f"taxonomy_stop_words_{lang}", + type="stop", + stopwords=stop_words, + remove_trailing=True, + ) + return None + + +TAXONOMIES_CHAR_FILTER = char_filter( + "taxonomies_char_filter", + type="mapping", + mappings=[ + # hyphen to underscore + "- => _", + # and escape quotes, so that ES cut words on them + r"' => \\'", + r"’ => \\'", + ], +) + + +def get_taxonomy_indexing_analyzer( + taxonomy: str, lang: str +) -> dsl_analysis.CustomAnalysis: + """We want to index taxonomies terms as keywords (as we only store the id), + but with a specific tweak: transform hyphens into underscores, + """ + # does not really depends on taxonomy and lang + return analyzer( + "taxonomy_indexing", + tokenizer="keyword", + char_filter=[TAXONOMIES_CHAR_FILTER], + ) + + +def get_taxonomy_search_analyzer( + taxonomy: str, lang: str, with_synonyms: bool +) -> dsl_analysis.CustomAnalysis: + """Return the search analyzer to use for the taxonomized field + + :param taxonomy: the taxonomy name + :param lang: the language code + :param with_synonyms: whether to add the synonym filter + """ + # we replace hyphen with underscore + filters: list[str | token_filter] = [ + "lowercase", + ] + stop_words = get_taxonomy_stop_words_filter(taxonomy, lang) + if stop_words: + filters.append(stop_words) + filters.append(SPECIAL_NORMALIZERS.get(lang, "asciifolding")) + if with_synonyms: + filters.append( + get_taxonomy_synonym_filter(taxonomy, lang), + ) + return analyzer( + f"search_{taxonomy}_{lang}", + char_filter=[TAXONOMIES_CHAR_FILTER], + tokenizer="standard", + filter=filters, + ) + + +def get_autocomplete_analyzer(lang: str) -> dsl_analysis.CustomAnalysis: + """Return the search analyzer to use for the autocomplete field""" + return analyzer( + f"autocomplete_{lang}", + tokenizer="standard", + filter=["lowercase", SPECIAL_NORMALIZERS.get(lang, "asciifolding")], + ) + + +def number_of_fields(mapping: Mapping | dict[str, JSONType]) -> int: + """Return the number of fields in the mapping""" + count = 0 + properties: dict[str, JSONType] = ( + mapping.to_dict().get("properties", {}) + if isinstance(mapping, Mapping) + else mapping + ) + for field, value in properties.items(): + if isinstance(value, dict): + if props := value.get("properties"): + # object field with properties + count += number_of_fields(props) + if fields := value.get("fields"): + # subfields + count += number_of_fields(fields) + count += 1 + return count diff --git a/app/utils/io.py b/app/utils/io.py index 34a93d2b..bf7859a4 100644 --- a/app/utils/io.py +++ b/app/utils/io.py @@ -1,4 +1,5 @@ import gzip +import shutil from pathlib import Path from typing import Callable, Iterable @@ -54,3 +55,27 @@ def dump_json(path: str | Path, item: JSONType, **kwargs): open_fn = get_open_fn(path) with open_fn(str(path), "wb") as f: f.write(orjson.dumps(item, **kwargs)) + + +def safe_replace_dir(target: Path, new_target: Path): + """Replace a directory atomically""" + # a temporary place for the target dir + old_target = target.with_suffix(target.suffix + ".old") + # move target to old_target + if old_target.exists(): + shutil.rmtree(old_target) + if target.exists(): + shutil.move(target, old_target) + # move our file + try: + shutil.move(new_target, target) + except Exception: + # if something went wrong, we restore the old target + if old_target.exists(): + shutil.move(old_target, target) + # reraise + raise + else: + # cleanup + if old_target.exists(): + shutil.rmtree(old_target) diff --git a/app/validations.py b/app/validations.py index b3b764d7..3164168a 100644 --- a/app/validations.py +++ b/app/validations.py @@ -1,6 +1,4 @@ -from typing import cast - -from .config import CONFIG, Config +from .config import Config, get_config def check_index_id_is_defined(index_id: str | None, config: Config) -> None: @@ -31,7 +29,7 @@ def check_all_values_are_fields_agg( errors: list[str] = [] if values is None: return errors - global_config = cast(Config, CONFIG) + global_config = get_config() index_id, index_config = global_config.get_index_config(index_id) if index_config is None: raise ValueError(f"Cannot get index config for index_id {index_id}") @@ -55,7 +53,7 @@ def check_fields_are_numeric( if values is None: return errors - global_config = cast(Config, CONFIG) + global_config = get_config() index_id, index_config = global_config.get_index_config(index_id) if index_config is None: raise ValueError(f"Cannot get index config for index_id {index_id}") diff --git a/data/config/openfoodfacts.yml b/data/config/openfoodfacts.yml index 256e366b..a12d3b73 100644 --- a/data/config/openfoodfacts.yml +++ b/data/config/openfoodfacts.yml @@ -44,19 +44,20 @@ indices: categories: full_text_search: true input_field: categories_tags - taxonomy_name: category + taxonomy_name: categories type: taxonomy + bucket_agg: true labels: full_text_search: true input_field: labels_tags - taxonomy_name: label + taxonomy_name: labels type: taxonomy + bucket_agg: true brands: full_text_search: true - split: true - type: text - brands_tags: - type: keyword + input_field: brands_tags + type: taxonomy + taxonomy_name: brands bucket_agg: true stores: split: true @@ -74,27 +75,22 @@ indices: bucket_agg: true quantity: type: text - categories_tags: - type: keyword - taxonomy_name: category - bucket_agg: true - labels_tags: - type: keyword - taxonomy_name: label - bucket_agg: true - countries_tags: - type: keyword + countries: + type: taxonomy + input_field: conutries_tags bucket_agg: true - taxonomy_name: country - states_tags: - type: keyword + taxonomy_name: countries + states: + type: taxonomy + input_field: states_tags bucket_agg: true - taxonomy_name: state + taxonomy_name: states origins_tags: type: keyword - ingredients_tags: - type: keyword - taxonomy_name: ingredient + ingredients: + type: taxonomy + input_field: ingredients_tags + taxonomy_name: ingredients unique_scans_n: type: integer scans_n: @@ -116,9 +112,11 @@ indices: type: disabled additives_n: type: integer - allergens_tags: - type: keyword - taxonomy_name: allergen + allergens: + type: taxonomy + input_field: allergens_tags + taxonomy_name: allergens + bucket_agg: true ecoscore_data: type: disabled ecoscore_score: @@ -163,74 +161,69 @@ indices: accent_color: "#ff8714" taxonomy: sources: - - name: category + - name: categories url: https://static.openfoodfacts.org/data/taxonomies/categories.full.json - - name: label + - name: labels url: https://static.openfoodfacts.org/data/taxonomies/labels.full.json - - name: additive + - name: additives url: https://static.openfoodfacts.org/data/taxonomies/additives.full.json - - name: allergen + - name: allergens url: https://static.openfoodfacts.org/data/taxonomies/allergens.full.json - - name: amino_acid + - name: amino_acids url: https://static.openfoodfacts.org/data/taxonomies/amino_acids.full.json - - name: country + - name: countries url: https://static.openfoodfacts.org/data/taxonomies/countries.full.json - name: data_quality url: https://static.openfoodfacts.org/data/taxonomies/data_quality.full.json - - name: food_group + - name: food_groups url: https://static.openfoodfacts.org/data/taxonomies/food_groups.full.json - - name: improvement + - name: improvements url: https://static.openfoodfacts.org/data/taxonomies/improvements.full.json - - name: ingredient + - name: ingredients url: https://static.openfoodfacts.org/data/taxonomies/ingredients.full.json - name: ingredients_analysis url: https://static.openfoodfacts.org/data/taxonomies/ingredients_analysis.full.json - name: ingredients_processing url: https://static.openfoodfacts.org/data/taxonomies/ingredients_processing.full.json - - name: language + - name: languages url: https://static.openfoodfacts.org/data/taxonomies/languages.full.json - - name: mineral + - name: minerals url: https://static.openfoodfacts.org/data/taxonomies/minerals.full.json - name: misc url: https://static.openfoodfacts.org/data/taxonomies/misc.full.json - - name: nova_group + - name: nova_groups url: https://static.openfoodfacts.org/data/taxonomies/nova_groups.full.json - - name: nucleotide + - name: nucleotides url: https://static.openfoodfacts.org/data/taxonomies/nucleotides.full.json - - name: nutrient + - name: nutrients url: https://static.openfoodfacts.org/data/taxonomies/nutrients.full.json - - name: origin + - name: origins url: https://static.openfoodfacts.org/data/taxonomies/origins.full.json - - name: other_nutritional_substance + - name: other_nutritional_substances url: https://static.openfoodfacts.org/data/taxonomies/other_nutritional_substances.full.json - - name: packaging_material + - name: packaging_materials url: https://static.openfoodfacts.org/data/taxonomies/packaging_materials.full.json - name: packaging_recycling url: https://static.openfoodfacts.org/data/taxonomies/packaging_recycling.full.json - - name: packaging_shape + - name: packaging_shapes url: https://static.openfoodfacts.org/data/taxonomies/packaging_shapes.full.json - name: periods_after_opening url: https://static.openfoodfacts.org/data/taxonomies/periods_after_opening.full.json - name: preservation url: https://static.openfoodfacts.org/data/taxonomies/preservation.full.json - - name: state + - name: states url: https://static.openfoodfacts.org/data/taxonomies/states.full.json - - name: vitamin + - name: vitamins url: https://static.openfoodfacts.org/data/taxonomies/vitamins.full.json - - name: brand + - name: brands url: https://static.openfoodfacts.org/data/taxonomies/brands.full.json - exported_langs: - - en - - fr - - es - - de - - it - - nl index: number_of_replicas: 1 number_of_shards: 4 name: off_taxonomy supported_langs: + # a specific language to put the main language entry + - main - aa - ab - ae @@ -386,7 +379,6 @@ indices: - wa - wo - xh - - xx - yi - yo - zh diff --git a/docker-compose.yml b/docker-compose.yml index 4467b325..52f562e0 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,7 +30,6 @@ x-base-es-envs: &base-es-envs http.cors.allow-headers: X-Requested-With,X-Auth-Token,Content-Type,Content-Length,Authorization http.cors.allow-credentials: "true" - x-api-common: &api-common image: ghcr.io/openfoodfacts/search-a-licious/search_service_image:${TAG:-dev} restart: ${RESTART_POLICY:-always} @@ -52,6 +51,7 @@ x-api-common: &api-common - common_net volumes: - ./data:/opt/search/data + - es_synonyms:/opt/search/synonyms services: @@ -63,6 +63,7 @@ services: discovery.seed_hosts: es02 volumes: - esdata01:/usr/share/elasticsearch/data + - es_synonyms:/usr/share/elasticsearch/config/synonyms es02: <<: *base-es-service @@ -74,6 +75,7 @@ services: - es01 volumes: - esdata02:/usr/share/elasticsearch/data + - es_synonyms:/usr/share/elasticsearch/config/synonyms # elasticsearch browser elasticvue: @@ -125,6 +127,7 @@ services: volumes: esdata01: esdata02: + es_synonyms: networks: # this is the network shared with product opener diff --git a/docker/prod.yml b/docker/prod.yml index 1684011f..7406c5d7 100644 --- a/docker/prod.yml +++ b/docker/prod.yml @@ -9,6 +9,9 @@ volumes: esdata02: external: true name: ${COMPOSE_PROJECT_NAME}_esdata02 + es_synonyms: + external: true + name: ${COMPOSE_PROJECT_NAME}_es_synonyms networks: common_net: diff --git a/docs/users/explain-configuration.md b/docs/users/explain-configuration.md index 23bb158e..87af06c7 100644 --- a/docs/users/explain-configuration.md +++ b/docs/users/explain-configuration.md @@ -50,13 +50,64 @@ You have to plan in advance how you configure this. Think well about: * fields you want to search and how you want to search them -* which informations you need to display in search results +* which information you need to display in search results * what you need to sort on * which facets you want to display * which charts you need to build Changing this section will probably involve a full re-indexing of all your items. +Some typical configurations for fields: + +A tags field that as values that are searched as an exact value (aka keyword), eg. a tag: +```yaml +tags: + type: keyword +``` + +An ingredients field that is used for full text search when no field is specified: +```yaml +ingredients: + type: text + full_text_search: true +``` + +A field `product_name` that is used for full text search, but with multilingual support: +```yaml +product_name: + full_text_search: true + type: text_lang +``` + +A scans_n field is an integer field: +```yaml +scans_n: + type: integer +``` + +A `specific_warnings` field that is used for full text search, +but only when you specify the field: +```yaml +specific_warnings: + type: text +``` + +A field brands_tags that needs to be split in multiple values (according to `split_separator` option): +```yaml +brands_tags: + type: keyword + split: true +``` + +A field labels_tags, that is used for exact match but with support of a taxonomy, +and that can be used for faceting, and bar graph generation: +```yaml +labels_tags: + type: keyword + taxonomy_name: label + bucket_agg: true +``` + Read more in the [reference documentation](./ref-config/searchalicious-config-schema.html#fields). ## Document fetcher, pre-processors and post-processors diff --git a/docs/users/explain-taxonomies.md b/docs/users/explain-taxonomies.md new file mode 100644 index 00000000..9c90b80f --- /dev/null +++ b/docs/users/explain-taxonomies.md @@ -0,0 +1,65 @@ +# Explain taxonomies + +Taxonomies are a way to organize categorization of items. + +Normally, a taxonomy is about a specific field. +For each possible values, it defines translations in different languages, and also possible synonyms (in each language). +For each entry we have a canonical identifier. + +A taxonomy also organizes the entries within a direct acyclic graph (a hierarchy but with possibility of multiple parents, though always avoiding cycles). +For example it may help describe that a salmon is a marine fish as well as a freshwater fish, and a oily fish. + +It can be used to help users find items using a specific field, in their language, even if they use a synonym for it. + +## Listing taxonomies + +If you plan to use taxonomies, you should first list them, in the [taxonomy section of the configuration](./ref-config/searchalicious-config-schema.html#indices_additionalProperties_taxonomy). + +Taxonomies must come in a JSON format, that can be downloaded at a particular URL. + +The data in the JSON must contain an object, where: +* each key correspond to the id of the taxonomy entries +* the value is an Object, with the following fields (none are mandatory): + * `name`: an Object associating language code, + with the entry name in the language (useful for translating the entry) + * `synonyms`: an Object associating language code, + with an array of synonyms for this entry in this language + +## Taxonomy fields + +As you define your [fields in the configuration](./explain-configuration.md#fields), +you can specify that a field is a taxonomy field (`type: taxonomy`). + +In this case, you also have to provide the following fields: +* taxonomy_name: the name of the taxonomy (as defined in the configuration) + +* synonyms_search: if true, + this will add a full text subfield that will enable using synonyms and translations to match this term. + + +## Autocompletion with taxonomies + +When you import taxonomies, they can be used to provide autocompletion in multiple ways. + +The webcomponents can use them to add values to facets, +or to provide suggestions in the search bar. + +You can also use the [autocompletion API](../ref-openapi/#operation/taxonomy_autocomplete_autocomplete_get) + +## Importing taxonomies + +If you defined taxonomies, +you must import them using the [import-taxonomies command](../devs/ref-python/cli.html#python3-m-app-import-taxonomies). + + +## Technical details on taxonomy fields + +A taxonomy field is stored in Elasticsearch as an object. +For each language it has a specific field, but in this field we just store the taxonomy entry id (eg. for organic, we always store `en:organic`). The analyzer is almost set to `keyword` which means it won't be tokenized (but it is not completely true, as we also transform hyphen to underscore). + +Note that the value of this field must be considered a unique token by elasticsearch standard tokenizer. +So you should only use letters, numbers, columns and the underscore. +As an exception, we allow the hyphen character, transforming it to "_" before tokenization. + +But those field have a specific *search analyzer*, so that when you enter a search query, +The query text is tokenized using standard analyzer, then lower cased, and we then look for synonyms in the taxonomy. \ No newline at end of file diff --git a/docs/users/ref-web-components.md b/docs/users/ref-web-components.md index 00669f8a..4afe261e 100644 --- a/docs/users/ref-web-components.md +++ b/docs/users/ref-web-components.md @@ -4,6 +4,8 @@ This page documents [web Components](https://developer.mozilla.org/en-US/docs/We provided by Search-a-licious to quickly build your interfaces. +See the [tutorial for an introduction](./tutorial.md#building-a-search-interface) + ## Customization ### Styling @@ -16,7 +18,6 @@ We only translated basic messages and most labels can generally be overridden us If you however needs to override current translations, you might clone this project, change translations in xliff files and regenerate the bundle. - ## Main components Those are the components you will certainly use to build your interface. diff --git a/frontend/public/off.html b/frontend/public/off.html index d7f24766..cda33377 100644 --- a/frontend/public/off.html +++ b/frontend/public/off.html @@ -286,7 +286,7 @@
  • - +
  • @@ -367,8 +367,8 @@ - - + + diff --git a/frontend/src/mixins/search-ctl.ts b/frontend/src/mixins/search-ctl.ts index aa489f4c..437bc3ea 100644 --- a/frontend/src/mixins/search-ctl.ts +++ b/frontend/src/mixins/search-ctl.ts @@ -37,6 +37,7 @@ import {isTheSameSearchName} from '../utils/search'; export interface SearchParameters extends SortParameters { q: string; + boost_phrase: Boolean; langs: string[]; page_size: string; page?: string; @@ -86,6 +87,9 @@ export const SearchaliciousSearchMixin = >( /** * The name of this search + * + * It enables having multiple search on the same page, + * if you specify it, your components must specify the attribute search-name */ @property() override name = DEFAULT_SEARCH_NAME; @@ -97,17 +101,31 @@ export const SearchaliciousSearchMixin = >( baseUrl = '/'; /** - * Separated list of languages + * Separated list of languages, + * the first one is the main language */ @property() langs = 'en'; /** * index to query + * + * If not specified, the default index will be used */ @property() index?: string; + /** + * Wether to use the boost phrase heuristic. + * + * This heuristic is used to boost nearby term in search results. + * It can greatly improve the pertinence of the search results (only for default sort) + * + * It defaults to false. + */ + @property({type: Boolean, attribute: 'boost-phrase'}) + boostPhrase = false; + /** * Number of result per page */ @@ -359,11 +377,15 @@ export const SearchaliciousSearchMixin = >( _paramsToQueryStr(params: SearchParameters): string { return Object.entries(params) .map(([key, value]) => { + if (value === false) { + return null; + } if (value.constructor === Array) { value = value.join(API_LIST_DIVIDER); } return `${encodeURIComponent(key)}=${encodeURIComponent(value!)}`; }) + .filter((val) => val !== null) .sort() // for perdictability in tests ! .join('&'); } @@ -463,6 +485,7 @@ export const SearchaliciousSearchMixin = >( } const params: SearchParameters = { q: queryParts.join(' '), + boost_phrase: this.boostPhrase, langs: this.langs .split(PROPERTY_LIST_DIVIDER) .map((lang) => lang.trim()), diff --git a/frontend/src/search-chart.ts b/frontend/src/search-chart.ts index 8b2b68c9..791f77cc 100644 --- a/frontend/src/search-chart.ts +++ b/frontend/src/search-chart.ts @@ -142,7 +142,7 @@ export class SearchaliciousDistributionChart extends SearchaliciousChart { if (isGetRequest) return this.field; else return { - chart_type: 'DistributionChartType', + chart_type: 'DistributionChart', field: this.field, }; } @@ -172,7 +172,7 @@ export class SearchaliciousScatterChart extends SearchaliciousChart { if (isGetRequest) return `${this.x}:${this.y}`; else return { - chart_type: 'ScatterChartType', + chart_type: 'ScatterChart', x: this.x, y: this.y, }; diff --git a/frontend/src/test/search-bar_test.ts b/frontend/src/test/search-bar_test.ts index ca128917..5a2b598a 100644 --- a/frontend/src/test/search-bar_test.ts +++ b/frontend/src/test/search-bar_test.ts @@ -78,14 +78,13 @@ suite('searchalicious-bar', () => { ); const input = el.shadowRoot!.querySelector('input'); input!.value = 'test'; - console.log(input); - console.log('input', input!.value); input!.dispatchEvent(new Event('input')); const bar = el as SearchaliciousBar; const searchParams = (bar as any)['_searchUrl'](); - console.log('input', input); assert.equal(searchParams.searchUrl, '/search'); + console.log(searchParams.params); assert.deepEqual(searchParams.params, { + boost_phrase: false, index_id: 'foo', langs: ['en'], page_size: '10', @@ -96,4 +95,28 @@ suite('searchalicious-bar', () => { 'index_id=foo&langs=en&page_size=10&q=test' ); }); + + test('_searchUrl computation boost phrase', async () => { + const el = await fixture( + html` ` + ); + const input = el.shadowRoot!.querySelector('input'); + input!.value = 'test'; + input!.dispatchEvent(new Event('input')); + const bar = el as SearchaliciousBar; + const searchParams = (bar as any)['_searchUrl'](); + assert.equal(searchParams.searchUrl, '/search'); + assert.deepEqual(searchParams.params, { + boost_phrase: true, + index_id: 'foo', + langs: ['en'], + page_size: '10', + q: 'test', + }); + // not present in search query string + assert.equal( + (bar as any)._paramsToQueryStr(searchParams.params), + 'boost_phrase=true&index_id=foo&langs=en&page_size=10&q=test' + ); + }); }); diff --git a/poetry.lock b/poetry.lock index 40366ad4..ea9b6b30 100644 --- a/poetry.lock +++ b/poetry.lock @@ -330,24 +330,58 @@ python-dateutil = "*" [package.extras] develop = ["coverage", "pytest", "pytest-cov", "pytest-mock", "pytz", "sphinx (>2)", "sphinx-rtd-theme (>0.5)"] +[[package]] +name = "factory-boy" +version = "3.3.1" +description = "A versatile test fixtures replacement based on thoughtbot's factory_bot for Ruby." +optional = false +python-versions = ">=3.8" +files = [ + {file = "factory_boy-3.3.1-py2.py3-none-any.whl", hash = "sha256:7b1113c49736e1e9995bc2a18f4dbf2c52cf0f841103517010b1d825712ce3ca"}, + {file = "factory_boy-3.3.1.tar.gz", hash = "sha256:8317aa5289cdfc45f9cae570feb07a6177316c82e34d14df3c2e1f22f26abef0"}, +] + +[package.dependencies] +Faker = ">=0.7.0" + +[package.extras] +dev = ["Django", "Pillow", "SQLAlchemy", "coverage", "flake8", "isort", "mongoengine", "mongomock", "mypy", "tox", "wheel (>=0.32.0)", "zest.releaser[recommended]"] +doc = ["Sphinx", "sphinx-rtd-theme", "sphinxcontrib-spelling"] + +[[package]] +name = "faker" +version = "30.4.0" +description = "Faker is a Python package that generates fake data for you." +optional = false +python-versions = ">=3.8" +files = [ + {file = "Faker-30.4.0-py3-none-any.whl", hash = "sha256:b6c2d61861dcf1084b8e10959418fe3380a1a3dcd2796a73d43f738a42aabb4c"}, + {file = "faker-30.4.0.tar.gz", hash = "sha256:6fd328db7195e70cdee479ee687fef6623c9b57b8023c582adbe88a01dc54297"}, +] + +[package.dependencies] +python-dateutil = ">=2.4" +typing-extensions = "*" + [[package]] name = "fastapi" -version = "0.109.2" +version = "0.115.2" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = false python-versions = ">=3.8" files = [ - {file = "fastapi-0.109.2-py3-none-any.whl", hash = "sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d"}, - {file = "fastapi-0.109.2.tar.gz", hash = "sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73"}, + {file = "fastapi-0.115.2-py3-none-any.whl", hash = "sha256:61704c71286579cc5a598763905928f24ee98bfcc07aabe84cfefb98812bbc86"}, + {file = "fastapi-0.115.2.tar.gz", hash = "sha256:3995739e0b09fa12f984bce8fa9ae197b35d433750d3d312422d846e283697ee"}, ] [package.dependencies] pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" -starlette = ">=0.36.3,<0.37.0" +starlette = ">=0.37.2,<0.41.0" typing-extensions = ">=4.8.0" [package.extras] -all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "jinja2 (>=2.11.2)", "python-multipart (>=0.0.7)", "uvicorn[standard] (>=0.12.0)"] [[package]] name = "filelock" @@ -376,6 +410,52 @@ files = [ {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, ] +[[package]] +name = "httpcore" +version = "1.0.6" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"}, + {file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.13,<0.15" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<1.0)"] + +[[package]] +name = "httpx" +version = "0.27.2" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"}, + {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +httpcore = "==1.*" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +zstd = ["zstandard (>=0.18.0)"] + [[package]] name = "identify" version = "2.5.36" @@ -1084,13 +1164,13 @@ files = [ [[package]] name = "starlette" -version = "0.36.3" +version = "0.40.0" description = "The little ASGI library that shines." optional = false python-versions = ">=3.8" files = [ - {file = "starlette-0.36.3-py3-none-any.whl", hash = "sha256:13d429aa93a61dc40bf503e8c801db1f1bca3dc706b10ef2434a36123568f044"}, - {file = "starlette-0.36.3.tar.gz", hash = "sha256:90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080"}, + {file = "starlette-0.40.0-py3-none-any.whl", hash = "sha256:c494a22fae73805376ea6bf88439783ecfba9aac88a43911b48c653437e784c4"}, + {file = "starlette-0.40.0.tar.gz", hash = "sha256:1a3139688fb298ce5e2d661d37046a66ad996ce94be4d4983be019a23a04ea35"}, ] [package.dependencies] @@ -1245,4 +1325,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "870fcc4a77cb0e535ba21a29b6c308c4423f37ce39789bcd09a60ef71b540234" +content-hash = "176677a049ac909a6959ad0e188ca376959ccd1449db6072f9377d835390749a" diff --git a/pyproject.toml b/pyproject.toml index 4820887b..17ea0f1d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,11 +5,12 @@ description = "" authors = ["Open Food Facts team"] license = "AGPL-3.0 licence" readme = "README.md" +package-mode = false [tool.poetry.dependencies] python = "^3.11" elasticsearch-dsl = "~8.9.0" -fastapi = "~0.109.1" +fastapi = "~0.115.0" requests = "~2.32.2" redis = "~5.0.0" uvicorn = "~0.23.2" @@ -34,6 +35,8 @@ types-cachetools = "^5.3.0.7" types-requests = "^2.31.0.10" types-pyyaml = "^6.0.12.12" pre-commit = "^3.5.0" +factory-boy = "^3.3.1" +httpx = "^0.27.1" [build-system] requires = ["poetry-core"] diff --git a/scripts/Dockerfile.schema b/scripts/Dockerfile.schema index 5e8fce33..d3b9ae35 100644 --- a/scripts/Dockerfile.schema +++ b/scripts/Dockerfile.schema @@ -1,4 +1,4 @@ -FROM python:3-slim +FROM python:3.12-slim ARG USER_UID=1000 ARG USER_GID=1000 diff --git a/tests/cli_utils.py b/tests/cli_utils.py new file mode 100644 index 00000000..d30e99c6 --- /dev/null +++ b/tests/cli_utils.py @@ -0,0 +1,24 @@ +import sys +import traceback + +from typer.testing import CliRunner + +from app.cli.main import cli + +runner = CliRunner(mix_stderr=False) + + +def add_cli_exc_info(result): + """Print exception info if there was an error during a CliRunner test + + This is useful to quickly grab the problem + """ + if result.exit_code != 0: + print("".join(traceback.format_exception(*result.exc_info)), file=sys.stderr) + + +def runner_invoke(*args): + """Run a CLI command and print exception info if there was an error""" + result = runner.invoke(cli, args) + add_cli_exc_info(result) + return result diff --git a/tests/conftest.py b/tests/conftest.py index 613e41b2..448f1b42 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,34 @@ import pytest +from fastapi.testclient import TestClient def pytest_addoption(parser): - """Add an option to update test result JSON files.""" + """Add an option to update test result JSON files, + and another to clean Elasticsearch indexes + """ parser.addoption("--update-results", action="store_true", default=False) + parser.addoption("--clean-es", action="store_true", default=False) -@pytest.fixture +@pytest.fixture(scope="session") def update_results(request): + """fixture to get the value of the --update-results flag + + It helps decide whether test should compare results to strode one + or update them + """ return request.config.getoption("--update-results") + + +@pytest.fixture(scope="session") +def clean_es(request): + """fixture to get the value of the --clean-es flag""" + return request.config.getoption("--clean-es") + + +@pytest.fixture +def test_client(): + """Provide a test client on the API""" + from app.api import app + + return TestClient(app) diff --git a/tests/int/__init__.py b/tests/int/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/int/conftest.py b/tests/int/conftest.py new file mode 100644 index 00000000..f32f4163 --- /dev/null +++ b/tests/int/conftest.py @@ -0,0 +1,87 @@ +import pathlib +import time + +import elasticsearch +import pytest + +import app.config +import app.utils.connection + +from .data_generation import ( + delete_es_indices, + ingest_data, + ingest_taxonomies, + load_state, + save_state, +) + +DATA_DIR = pathlib.Path(__file__).parent / "data" +DEFAULT_CONFIG_PATH = DATA_DIR / "test_off.yml" + + +@pytest.fixture(scope="session") +def test_off_config(): + """Fixture that sets default config to DEFAULT_CONFIG_PATH""" + return app.config.set_global_config(DEFAULT_CONFIG_PATH) + + +@pytest.fixture(scope="session") +def index_config(test_off_config): + """Fixture that returns the IndexConfig corresponding to test_off.""" + return test_off_config.get_index_config("test_off")[1] + + +ES_MAX_WAIT = 60 # 1 minute + + +@pytest.fixture(scope="session") +def es_connection(test_off_config): + """Fixture that get's an Elasticsearch connection""" + es = None + waited = 0 + while es is None: + try: + es = app.utils.connection.get_es_client() + health = es.cluster.health() + if health.get("status") != "green": + raise elasticsearch.exceptions.ConnectionError( + "Elasticsearch not ready" + ) + return es + except elasticsearch.exceptions.ConnectionError: + waited += 1 + if waited > ES_MAX_WAIT: + raise + time.sleep(1) + es = None + + +@pytest.fixture +def synonyms_created(index_config, es_connection): + """A feature to ensure synonyms file are created""" + ingest_taxonomies("test_off", index_config, es_connection) + + +@pytest.fixture(scope="session") +def data_ingester(index_config, es_connection, clean_es): + """Return a feature to ingest data + + It will cleanup ES if requested by --clean-es option. + + It will ingest taxonomies and data, if needed, or use previous created indexes. + """ + if clean_es: + delete_es_indices(es_connection) + else: + load_state("test_off", index_config, es_connection) + + def _ingester(data, read_only=True): + """The implementation of data ingestion + + if you test modifies the index, you should set read_only=False + """ + ingest_taxonomies("test_off", index_config, es_connection) + ingest_data(data, "test_off", index_config, es_connection, read_only=read_only) + save_state("test_off", index_config, es_connection) + + return _ingester diff --git a/tests/int/data/test_categories.full.json b/tests/int/data/test_categories.full.json new file mode 100644 index 00000000..e6bd47ee --- /dev/null +++ b/tests/int/data/test_categories.full.json @@ -0,0 +1 @@ +{"en:dried-plant-based-foods": {"children": ["en:dehydrated-vegetable-bouillons", "en:dried-aromatic-plants", "en:dried-fig-cakes", "en:dried-fruits", "en:dried-mushrooms", "en:dried-seaweeds", "en:dried-vegetables", "en:ground-dried-aromatic-plants", "en:ground-dried-seaweeds", "en:ground-dried-vegetables", "fr:melons-deshydrates"], "synonyms": {"fi": ["kuivatut kasvipohjaiset tuotteet"], "fr": ["Aliments \u00e0 base de plantes s\u00e9ch\u00e9es", "Aliments secs \u00e0 base de plantes"], "de": ["Getrocknete Produkte auf pflanzlicher Basis"], "hu": ["Sz\u00e1r\u00edtott n\u00f6v\u00e9nyi alap\u00fa \u00e9telek"], "he": ["\u05de\u05d6\u05d5\u05df \u05de\u05d9\u05d5\u05d1\u05e9 \u05de\u05df \u05d4\u05e6\u05d5\u05de\u05d7"], "nl": ["Producten op basis van gedroogde planten"], "ru": ["\u0421\u0443\u0448\u0435\u043d\u044b\u0435 \u0431\u043b\u044e\u0434\u0430 \u0438\u0437 \u0440\u0430\u0441\u0442\u0435\u043d\u0438\u0439"], "pl": ["Suszone produkty pochodzenia ro\u015blinnego"], "pt": ["Alimentos \u00e0 base de plantas secos"], "en": ["Dried plant-based foods"], "ca": ["Aliments secs amb base vegetal"], "bg": ["\u0421\u0443\u0448\u0435\u043d\u0438 \u0440\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u043d\u0438 \u0445\u0440\u0430\u043d\u0438"], "es": ["Alimentos de origen vegetal deshidratados", "Alimentos de origen vegetal secos", "Alimentos de origen vegetal desecados", "Vegetales deshidratados", "Vegetales desecados"], "hr": ["Su\u0161ena hrana biljnog podrijetla"], "it": ["Cibi disidratati a base di vegetali"]}, "name": {"ru": "\u0421\u0443\u0448\u0435\u043d\u044b\u0435 \u0431\u043b\u044e\u0434\u0430 \u0438\u0437 \u0440\u0430\u0441\u0442\u0435\u043d\u0438\u0439", "pl": "Suszone produkty pochodzenia ro\u015blinnego", "pt": "Alimentos \u00e0 base de plantas secos", "en": "Dried plant-based foods", "ca": "Aliments secs amb base vegetal", "bg": "\u0421\u0443\u0448\u0435\u043d\u0438 \u0440\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u043d\u0438 \u0445\u0440\u0430\u043d\u0438", "es": "Alimentos de origen vegetal deshidratados", "it": "Cibi disidratati a base di vegetali", "hr": "Su\u0161ena hrana biljnog podrijetla", "fi": "Kuivatut kasvipohjaiset tuotteet", "fr": "Aliments \u00e0 base de plantes s\u00e9ch\u00e9es", "de": "Getrocknete Produkte auf pflanzlicher Basis", "hu": "Sz\u00e1r\u00edtott n\u00f6v\u00e9nyi alap\u00fa \u00e9telek", "he": "\u05de\u05d6\u05d5\u05df \u05de\u05d9\u05d5\u05d1\u05e9 \u05de\u05df \u05d4\u05e6\u05d5\u05de\u05d7", "nl": "Producten op basis van gedroogde planten"}, "parents": ["en:dried-products", "en:plant-based-foods"]}, "en:syrups": {"gpc_category_description": {"en": "Definition: Includes any products that can be described/observed as a sweet thick liquid that is a derivative of the sugar crystallization process, such as molasses -the residue left after the first stage-, treacle -the residue from the second stage that is sweeter and less viscous than molasses-, syrup -prepared from molasses, glucose and often flavourings or a solution of sugar which may be from a variety of sources, such as maple-. Products include corn syrups, fruit syrups, golden syrups, maple syrups and molasses. Definition Excludes: Excludes products such as Sugar and Sugar Substitutes."}, "name": {"cs": "Sirupy", "de": "Sirups", "fr": "Sirops", "ro": "Sirop", "fi": "Siirapit", "nl": "Siropen", "he": "\u05e1\u05d9\u05e8\u05d5\u05e4", "hu": "Szirupok", "pt": "Xaropes", "pl": "Syrop", "ru": "\u0421\u0438\u0440\u043e\u043f\u044b", "it": "Sciroppi", "hr": "Sirup", "es": "Siropes", "ja": "\u30b7\u30ed\u30c3\u30d7", "bg": "\u0421\u0438\u0440\u043e\u043f", "th": "\u0e19\u0e49\u0e33\u0e40\u0e0a\u0e37\u0e48\u0e2d\u0e21", "ca": "Alm\u00edvars", "en": "Syrups", "zh": "\u7cd6\u6d46"}, "gpc_category_name": {"en": "Syrup/Treacle/Molasses -Shelf Stable-"}, "wikidata": {"en": "Q6584340"}, "gpc_category_code": {"en": "10000044"}, "synonyms": {"ru": ["\u0421\u0438\u0440\u043e\u043f\u044b"], "pt": ["Xaropes"], "pl": ["Syrop", "syropy"], "ca": ["Alm\u00edvars", "xarops"], "zh": ["\u7cd6\u6d46"], "en": ["Syrups"], "it": ["Sciroppi", "Sciroppo"], "hr": ["sirup"], "th": ["\u0e19\u0e49\u0e33\u0e40\u0e0a\u0e37\u0e48\u0e2d\u0e21"], "bg": ["\u0421\u0438\u0440\u043e\u043f"], "ja": ["\u30b7\u30ed\u30c3\u30d7"], "es": ["Siropes", "Jarabes"], "fr": ["Sirops", "sirop"], "fi": ["siirapit"], "ro": ["Sirop"], "cs": ["Sirupy"], "de": ["Sirups", "Sirup"], "he": ["\u05e1\u05d9\u05e8\u05d5\u05e4"], "hu": ["Szirupok"], "nl": ["Siropen"]}, "children": ["en:concentrate-syrup-without-sugar-and-with-artificial-sweeteners", "en:flavoured-syrups", "en:mint-flavoured-syrup-with-sugar-diluted-in-water", "en:simple-syrups", "en:syrup-with-sugar-to-be-diluted", "en:thick-syrups"]}, "en:tomatoes-and-their-products": {"synonyms": {"la": ["Solanum lycopersicum", "Lycopersicon lycopersicum", "Lycopersicon esculentum"], "he": ["\u05e2\u05d2\u05d1\u05e0\u05d9\u05d5\u05ea \u05d5\u05de\u05d5\u05e6\u05e8\u05d9\u05d4\u05df"], "hu": ["Paradicsom \u00e9s pradicsom alap\u00fa term\u00e9kek"], "nl": ["Tomaten en afgeleide producten"], "fr": ["Tomates et d\u00e9riv\u00e9s"], "fi": ["tomaatit ja tomaattituotteet"], "de": ["Tomaten und Tomatenprodukte"], "ca": ["Tom\u00e0quets i productes de tom\u00e0quet", "Tom\u00e0quets i productes derivats"], "en": ["Tomatoes and their products", "Tomatoes and tomato products"], "hr": ["Raj\u010dice i njihovih proizvoda"], "it": ["Pomodori e prodotti derivati"], "bg": ["\u0414\u043e\u043c\u0430\u0442\u0438 \u0438 \u0434\u043e\u043c\u0430\u0442\u0435\u043d\u0438 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u0438"], "es": ["Tomates y sus productos", "Tomates y derivados"], "ru": ["\u041f\u043e\u043c\u0438\u0434\u043e\u0440\u044b \u0438 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u044b \u0438\u0437 \u043f\u043e\u043c\u0438\u0434\u043e\u0440"], "pt": ["Tomates e derivados"], "pl": ["Pomidory i produkty z pomidor\u00f3w"]}, "children": ["en:boiled-tomato-pulp-and-peel", "en:cream-of-tomato-soups", "en:double-concentrate-tomato-paste", "en:dried-tomato-powder", "en:dried-tomatoes", "en:natural-grated-tomato", "en:roasted-tomato-with-skin", "en:strained-tomatoes", "en:stuffed-tomatoes", "en:tomato-chutneys", "en:tomato-jams", "en:tomato-juices", "en:tomato-pastes", "en:tomato-pulps", "en:tomato-purees", "en:tomatoes"], "wikidata": {"en": "Q23501"}, "name": {"nl": "Tomaten en afgeleide producten", "hu": "Paradicsom \u00e9s pradicsom alap\u00fa term\u00e9kek", "la": "Solanum lycopersicum", "he": "\u05e2\u05d2\u05d1\u05e0\u05d9\u05d5\u05ea \u05d5\u05de\u05d5\u05e6\u05e8\u05d9\u05d4\u05df", "de": "Tomaten und Tomatenprodukte", "fi": "Tomaatit ja tomaattituotteet", "fr": "Tomates et d\u00e9riv\u00e9s", "bg": "\u0414\u043e\u043c\u0430\u0442\u0438 \u0438 \u0434\u043e\u043c\u0430\u0442\u0435\u043d\u0438 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u0438", "es": "Tomates y sus productos", "hr": "Raj\u010dice i njihovih proizvoda", "it": "Pomodori e prodotti derivati", "en": "Tomatoes and their products", "ca": "Tom\u00e0quets i productes de tom\u00e0quet", "pl": "Pomidory i produkty z pomidor\u00f3w", "pt": "Tomates e derivados", "ru": "\u041f\u043e\u043c\u0438\u0434\u043e\u0440\u044b \u0438 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u044b \u0438\u0437 \u043f\u043e\u043c\u0438\u0434\u043e\u0440"}, "parents": ["en:vegetables-based-foods"]}, "en:biscuits-and-cakes": {"food_groups": {"en": "en:biscuits-and-cakes"}, "name": {"en": "Biscuits and cakes", "zh": "\u997c\u5e72\u548c\u86cb\u7cd5", "ca": "Galetes i pastissos", "sv": "Kakor och t\u00e5rtor", "bg": "\u0411\u0438\u0441\u043a\u0432\u0438\u0442\u0438 \u0438 \u0441\u043b\u0430\u0434\u043a\u0438\u0448\u0438", "es": "Galletas y pasteles", "th": "\u0e02\u0e19\u0e21\u0e1b\u0e31\u0e07\u0e01\u0e23\u0e2d\u0e1a\u0e41\u0e25\u0e30\u0e40\u0e04\u0e4a\u0e01", "it": "Biscotti e torte", "hr": "Keksi i kola\u010di", "ru": "\u041f\u0438\u0440\u043e\u0433\u0438 \u0438 \u043f\u0435\u0447\u0435\u043d\u044c\u0435", "no": "Kjeks og kaker", "lt": "Sausainiai ir pyragai", "pt": "Biscoitos e Bolos", "da": "Kiks og kage", "hu": "Kekszek \u00e9s s\u00fctem\u00e9nyek", "he": "\u05e2\u05d5\u05d2\u05d9\u05d5\u05ea \u05d5\u05e2\u05d5\u05d2\u05d5\u05ea", "tr": "Bisk\u00fcviler ve kekler", "nl": "Koekjes en cakes", "fi": "Keksit ja kakut", "ro": "Biscui\u021bi \u0219i pr\u0103jituri", "fr": "Biscuits et g\u00e2teaux", "de": "Kekse und Kuchen"}, "description": {"en": "A biscuit is a flour-based baked food product, which is typically hard, flat and unleavened."}, "children": ["en:biscuit-with-chocolate-covering", "en:biscuits", "en:brookies", "en:cake-with-almond-flavoured-filling", "en:cakes", "en:coated-chocolate-bar-without-biscuit", "en:cone-wafer-for-ice-cream", "en:diet-crispy-biscuit", "en:frozen-cakes-and-pastries", "en:fruit-biscuits", "en:gingerbreads", "en:jaffa-cakes", "en:pastries", "en:reduced-fat-biscuit-bar-filled-with-fruits", "en:savoy-style-sponge-cake", "en:snack-biscuit-with-fruits-filling", "en:thin-biscuits-with-fruits", "fr:biscuits-fourres-aux-fruits"], "synonyms": {"he": ["\u05e2\u05d5\u05d2\u05d9\u05d5\u05ea \u05d5\u05e2\u05d5\u05d2\u05d5\u05ea"], "hu": ["Kekszek \u00e9s s\u00fctem\u00e9nyek"], "nl": ["Koekjes en cakes"], "tr": ["Bisk\u00fcviler ve kekler"], "fr": ["Biscuits et g\u00e2teaux"], "ro": ["Biscui\u021bi \u0219i pr\u0103jituri"], "fi": ["Keksit ja kakut"], "de": ["Kekse und Kuchen"], "ca": ["Galetes i pastissos"], "sv": ["Kakor och t\u00e5rtor"], "en": ["Biscuits and cakes"], "zh": ["\u997c\u5e72\u548c\u86cb\u7cd5"], "it": ["Biscotti e torte", "Biscotti e dolci"], "hr": ["Keksi i kola\u010di"], "es": ["Galletas y pasteles"], "bg": ["\u0411\u0438\u0441\u043a\u0432\u0438\u0442\u0438 \u0438 \u0441\u043b\u0430\u0434\u043a\u0438\u0448\u0438"], "th": ["\u0e02\u0e19\u0e21\u0e1b\u0e31\u0e07\u0e01\u0e23\u0e2d\u0e1a\u0e41\u0e25\u0e30\u0e40\u0e04\u0e4a\u0e01"], "no": ["Kjeks og kaker"], "ru": ["\u041f\u0438\u0440\u043e\u0433\u0438 \u0438 \u043f\u0435\u0447\u0435\u043d\u044c\u0435"], "lt": ["Sausainiai ir pyragai", "Sausainiai ir tortai"], "pt": ["Biscoitos e Bolos"], "da": ["Kiks og kage"]}, "parents": ["en:sweet-snacks"], "pnns_group_2": {"en": "Biscuits and cakes"}}, "en:fruits-and-vegetables-based-foods": {"parents": ["en:plant-based-foods"], "name": {"fr": "Aliments \u00e0 base de fruits et de l\u00e9gumes", "fi": "Hedelm\u00e4- ja vihannespohjaiset ruoat", "de": "Frucht- und gem\u00fcsebasierte Lebensmittel", "hu": "Gy\u00fcm\u00f6lcs\u00f6k \u00e9s z\u00f6lds\u00e9gek alap\u00fa \u00e9lelmiszerek", "no": "Frukt- og gr\u00f8nnsaksbaserte matvarer", "pt": "Alimentos \u00e0 base de frutas e legumes", "lt": "Vaisi\u0173 ir dar\u017eovi\u0173 maistas", "da": "Frugt- og gr\u00f8ntsagsbasere f\u00f8devarer", "sv": "Matvaror baserade p\u00e5 frukt och gr\u00f6nsaker", "zh": "\u4ee5\u6c34\u679c\u548c\u852c\u83dc\u4e3a\u57fa\u7840\u7684\u98df\u54c1", "hr": "Hrana na bazi vo\u0107a i povr\u0107a", "ja": "\u679c\u7269\u3068\u91ce\u83dc\u3092\u30d9\u30fc\u30b9\u306b\u3057\u305f\u98df\u54c1", "es": "Frutas y verduras y sus productos", "th": "\u0e1c\u0e25\u0e34\u0e15\u0e20\u0e31\u0e13\u0e11\u0e4c\u0e08\u0e32\u0e01\u0e1c\u0e31\u0e01\u0e41\u0e25\u0e30\u0e1c\u0e25\u0e44\u0e21\u0e49", "he": "\u05de\u05d6\u05d5\u05df \u05de\u05d1\u05d5\u05e1\u05e1 \u05e4\u05d9\u05e8\u05d5\u05ea \u05d5\u05d9\u05e8\u05e7\u05d5\u05ea", "nl": "Voedsel op basis van fruit en groenten", "tr": "Meyve ve sebze kaynakl\u0131 g\u0131dalar", "ru": "\u041f\u0440\u043e\u0434\u0443\u043a\u0442\u044b \u0438\u0437 \u0444\u0440\u0443\u043a\u0442\u043e\u0432 \u0438\u043b\u0438 \u043e\u0432\u043e\u0449\u0435\u0439", "ko": "\uacfc\uc77c\uacfc \ucc44\uc18c \uae30\ubc18 \uc2dd\ud488", "pl": "Produkty na bazie warzyw i owoc\u00f3w", "ca": "Aliments amb base de fruites i verdures", "en": "Fruits and vegetables based foods", "it": "Cibi a base di frutta e verdura"}, "pnns_group_1": {"en": "Fruits and vegetables"}, "food_groups": {"en": "en:fruits-and-vegetables"}, "children": ["en:fruits-based-foods", "en:fruits-vegetables-nuts-seeds-variety-packs", "en:vegetable-soups", "en:vegetables-based-foods"], "synonyms": {"fi": ["Hedelm\u00e4- ja vihannespohjaiset ruoat"], "fr": ["Aliments \u00e0 base de fruits et de l\u00e9gumes"], "de": ["Frucht- und gem\u00fcsebasierte Lebensmittel"], "hu": ["Gy\u00fcm\u00f6lcs\u00f6k \u00e9s z\u00f6lds\u00e9gek alap\u00fa \u00e9lelmiszerek"], "no": ["Frukt- og gr\u00f8nnsaksbaserte matvarer"], "pt": ["Alimentos \u00e0 base de frutas e legumes"], "lt": ["Vaisi\u0173 ir dar\u017eovi\u0173 maistas"], "da": ["Frugt- og gr\u00f8ntsagsbasere f\u00f8devarer"], "zh": ["\u4ee5\u6c34\u679c\u548c\u852c\u83dc\u4e3a\u57fa\u7840\u7684\u98df\u54c1"], "sv": ["Matvaror baserade p\u00e5 frukt och gr\u00f6nsaker"], "ja": ["\u679c\u7269\u3068\u91ce\u83dc\u3092\u30d9\u30fc\u30b9\u306b\u3057\u305f\u98df\u54c1"], "es": ["Frutas y verduras y sus productos"], "th": ["\u0e1c\u0e25\u0e34\u0e15\u0e20\u0e31\u0e13\u0e11\u0e4c\u0e08\u0e32\u0e01\u0e1c\u0e31\u0e01\u0e41\u0e25\u0e30\u0e1c\u0e25\u0e44\u0e21\u0e49"], "hr": ["Hrana na bazi vo\u0107a i povr\u0107a"], "he": ["\u05de\u05d6\u05d5\u05df \u05de\u05d1\u05d5\u05e1\u05e1 \u05e4\u05d9\u05e8\u05d5\u05ea \u05d5\u05d9\u05e8\u05e7\u05d5\u05ea"], "tr": ["Meyve ve sebze kaynakl\u0131 g\u0131dalar", "Meyve ve sebze kaynakl\u0131 yiyecekler", "Meyve ve sebze kaynakl\u0131 besinler"], "nl": ["Voedsel op basis van fruit en groenten"], "ru": ["\u041f\u0440\u043e\u0434\u0443\u043a\u0442\u044b \u0438\u0437 \u0444\u0440\u0443\u043a\u0442\u043e\u0432 \u0438\u043b\u0438 \u043e\u0432\u043e\u0449\u0435\u0439"], "pl": ["Produkty na bazie warzyw i owoc\u00f3w"], "ko": ["\uacfc\uc77c\uacfc \ucc44\uc18c \uae30\ubc18 \uc2dd\ud488"], "en": ["Fruits and vegetables based foods"], "ca": ["Aliments amb base de fruites i verdures"], "it": ["Cibi a base di frutta e verdura"]}}, "en:shortbread-cookies": {"synonyms": {"en": ["Shortbread cookies", "Shortbread pastry biscuit"], "ca": ["Galetes de mantega"], "nl": ["Zandkoekjes", "Zandkokje"], "es": ["Galletas de mantequilla"], "ja": ["\u30b7\u30e7\u30fc\u30c8\u30d6\u30ec\u30c3\u30c9"], "hr": ["Keksi od prhkog tijesta"], "it": ["Frollini"], "fi": ["Murokeksit"], "fr": ["Biscuits sabl\u00e9s", "Biscuit sabl\u00e9", "Sabl\u00e9s", "Sabl\u00e9", "Sabl\u00e9 p\u00e2tissier"], "de": ["M\u00fcrbeteigkekse"]}, "children": ["en:fruit-shortbread-cake", "en:shortbread", "en:shortbread-biscuit-with-butter-and-chocolate", "en:shortbread-biscuit-with-chocolate", "en:shortbread-biscuit-with-fruits", "en:shortbread-cookie-with-apple", "en:shortbread-cookie-with-chocolate", "en:shortbread-cookie-with-cocoa", "en:shortbread-cookie-with-coconut", "en:shortbread-cookie-with-praline", "en:shortbread-cookie-with-red-berries", "en:shortbread-cookies-from-brittany", "fr:palets", "fr:sables-au-beurre"], "parents": ["en:biscuits"], "ciqual_food_code": {"en": "24071"}, "wikidata": {"en": "Q2915670"}, "ciqual_food_name": {"en": "Shortbread pastry biscuit", "fr": "Sabl\u00e9 p\u00e2tissier"}, "name": {"ca": "Galetes de mantega", "en": "Shortbread cookies", "it": "Frollini", "hr": "Keksi od prhkog tijesta", "es": "Galletas de mantequilla", "nl": "Zandkoekjes", "ja": "\u30b7\u30e7\u30fc\u30c8\u30d6\u30ec\u30c3\u30c9", "fr": "Biscuits sabl\u00e9s", "fi": "Murokeksit", "de": "M\u00fcrbeteigkekse"}, "agribalyse_food_code": {"en": "24071"}}, "en:dried-products": {"synonyms": {"de": ["Getrocknete Produkte"], "fi": ["kuivatut tuotteet"], "fr": ["Produits d\u00e9shydrat\u00e9s", "produits lyophilis\u00e9s", "produits s\u00e9ch\u00e9s", "aliments s\u00e9ch\u00e9s", "Aliments secs"], "nl": ["Gedroogde producten", "Gedroogd product"], "tr": ["Kurutulmu\u015f \u00fcr\u00fcnler"], "hu": ["Sz\u00e1r\u00edtott term\u00e9kek", "sz\u00e1r\u00edtott \u00e9lelmiszerek"], "he": ["\u05de\u05d5\u05e6\u05e8\u05d9\u05dd \u05de\u05d9\u05d5\u05d1\u05e9\u05d9\u05dd", "\u05de\u05d5\u05e6\u05e8\u05d9\u05dd \u05d9\u05d1\u05e9\u05d9\u05dd"], "pl": ["Produkty suszone", "produkty poddane dehydratacji", "\u017cywno\u015b\u0107 suszona"], "pt": ["Produtos secos", "produtos desidratados", "Alimentos secos", "Alimentos desidratados"], "da": ["T\u00f8rrede produkter", "Dehydrerede f\u00f8devarer"], "lt": ["D\u017eiovinti produktai", "dehidratuoti produktai", "d\u017eiovinti maisto produktai", "dehidratuoti maisto produktai"], "ru": ["\u041e\u0431\u0435\u0437\u0432\u043e\u0436\u0435\u043d\u043d\u044b\u0435 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u044b"], "th": ["\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e41\u0e2b\u0e49\u0e07"], "bg": ["\u0421\u0443\u0448\u0435\u043d\u0438 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u0438"], "ja": ["\u4e7e\u7269"], "es": ["Productos deshidratados", "Productors deshidratados", "alimentos deshidratados", "Alimentos secos", "Alimentos desecados"], "it": ["Prodotti disidratati"], "hr": ["Su\u0161eni proizvodi"], "en": ["Dried products", "dehydrated products", "Dried foods", "Dehydrated foods"], "ca": ["Productes secs", "deshidratats"]}, "children": ["en:baby-milks-in-powder", "en:dried-meals", "en:dried-plant-based-foods", "en:dried-products-to-be-rehydrated", "en:iced-teas-preparations"], "name": {"ca": "Productes secs", "en": "Dried products", "it": "Prodotti disidratati", "hr": "Su\u0161eni proizvodi", "th": "\u0e2d\u0e32\u0e2b\u0e32\u0e23\u0e41\u0e2b\u0e49\u0e07", "ja": "\u4e7e\u7269", "es": "Productos deshidratados", "bg": "\u0421\u0443\u0448\u0435\u043d\u0438 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u0438", "ru": "\u041e\u0431\u0435\u0437\u0432\u043e\u0436\u0435\u043d\u043d\u044b\u0435 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u044b", "pt": "Produtos secos", "lt": "D\u017eiovinti produktai", "da": "T\u00f8rrede produkter", "pl": "Produkty suszone", "he": "\u05de\u05d5\u05e6\u05e8\u05d9\u05dd \u05de\u05d9\u05d5\u05d1\u05e9\u05d9\u05dd", "hu": "Sz\u00e1r\u00edtott term\u00e9kek", "tr": "Kurutulmu\u015f \u00fcr\u00fcnler", "nl": "Gedroogde producten", "fr": "Produits d\u00e9shydrat\u00e9s", "fi": "Kuivatut tuotteet", "de": "Getrocknete Produkte"}}, "en:dairy-desserts": {"children": ["en:creamy-puddings", "en:crema-catalana", "en:creme-brulee", "en:cuajada", "en:custard-puddings", "en:dairy-chocolate-desserts", "en:dairy-mousses", "en:entremets-mousses-and-creamy-puddings", "en:fermented-dairy-desserts", "en:flans", "en:fresh-cream-cheese-cake", "en:junkets", "en:panna-cottas", "en:semolina-puddings", "en:yogurt-cake", "fr:desserts-lactes-a-la-vanille", "fr:desserts-lactes-au-cafe", "fr:desserts-lactes-aux-oeufs", "fr:gateaux-de-riz", "fr:gateaux-de-semoule", "fr:oeufs-au-lait"], "synonyms": {"it": ["Dolci a base di latte", "dessert a base di latte"], "hr": ["Mlije\u010dne slastice"], "es": ["Postres l\u00e1cteos"], "bg": ["\u041c\u043b\u0435\u0447\u043d\u0438 \u0434\u0435\u0441\u0435\u0440\u0442\u0438"], "ca": ["Postres l\u00e0ctics"], "zh": ["\u5976\u7c7b\u751c\u54c1"], "en": ["Dairy desserts"], "pt": ["Sobremesas de latic\u00ednios"], "lt": ["Pieniniai desertai", "Desertai i\u0161 pieno"], "pl": ["Desery mleczne"], "ru": ["\u041c\u043e\u043b\u043e\u0447\u043d\u044b\u0435 \u0434\u0435\u0441\u0435\u0440\u0442\u044b", "\u0414\u0435\u0441\u0435\u0440\u0442\u044b \u043c\u043e\u043b\u043e\u0447\u043d\u044b\u0435"], "nl": ["Desserts op basis van melk", "Nagerechten op basis van melk", "Toetjes op basis van melk"], "he": ["\u05e7\u05d9\u05e0\u05d5\u05d7\u05d9\u05dd \u05d7\u05dc\u05d1\u05d9\u05d9\u05dd"], "hu": ["Tejipari desszertek", "tejes desszertek"], "de": ["Milchnachspeisen", "Nachspeisen auf Milchbasis"], "fr": ["Desserts lact\u00e9s", "Desserts \u00e0 base de lait", "Desserts au lait"], "fi": ["Maitoj\u00e4lkiruoat"]}, "intake24_category_code": {"fr": "YGRT", "en": "YGRT"}, "pnns_group_2": {"en": "Dairy desserts"}, "parents": ["en:dairies", "en:desserts"], "food_groups": {"en": "en:dairy-desserts"}, "name": {"ca": "Postres l\u00e0ctics", "zh": "\u5976\u7c7b\u751c\u54c1", "en": "Dairy desserts", "hr": "Mlije\u010dne slastice", "it": "Dolci a base di latte", "es": "Postres l\u00e1cteos", "bg": "\u041c\u043b\u0435\u0447\u043d\u0438 \u0434\u0435\u0441\u0435\u0440\u0442\u0438", "ru": "\u041c\u043e\u043b\u043e\u0447\u043d\u044b\u0435 \u0434\u0435\u0441\u0435\u0440\u0442\u044b", "lt": "Pieniniai desertai", "pt": "Sobremesas de latic\u00ednios", "pl": "Desery mleczne", "he": "\u05e7\u05d9\u05e0\u05d5\u05d7\u05d9\u05dd \u05d7\u05dc\u05d1\u05d9\u05d9\u05dd", "hu": "Tejipari desszertek", "nl": "Desserts op basis van melk", "fr": "Desserts lact\u00e9s", "fi": "Maitoj\u00e4lkiruoat", "de": "Milchnachspeisen"}}, "en:carbonated-drinks": {"name": {"ca": "Begudes carbonatades", "sv": "Kolsyrade Drycker", "zh": "\u78b3\u9178\u996e\u6599", "en": "Carbonated drinks", "hr": "Gazirana pi\u0107a", "it": "Bevande effervescente", "ja": "\u70ad\u9178\u98f2\u6599", "es": "Bebidas carbonatadas", "bg": "\u0413\u0430\u0437\u0438\u0440\u0430\u043d\u0438 \u043d\u0430\u043f\u0438\u0442\u043a\u0438", "ru": "\u0413\u0430\u0437\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u043d\u0430\u043f\u0438\u0442\u043a\u0438", "da": "Kulsyreholdige drikkevarer", "pt": "Bebidas carbonatadas", "lt": "Gazuoti g\u0117rimai", "pl": "Napoje gazowane", "hu": "Sz\u00e9nsavas italok", "nl": "Koolzuurhoudende dranken", "tr": "Gazl\u0131 i\u00e7ecekler", "sl": "Gazirane pija\u010de", "fr": "Boissons gazeuses", "fi": "Hiilihapotetut juomat", "cs": "Sycen\u00e9 n\u00e1poje", "de": "Kohlens\u00e4urehaltige Getr\u00e4nke"}, "parents": ["en:beverages"], "children": ["en:carbonated-fruit-soft-drink", "en:carbonated-soft-drinks-without-fruit-juice", "en:carbonated-waters", "en:sodas", "en:sparkling-apple-juices", "en:sparkling-apple-nectars"], "synonyms": {"hu": ["Sz\u00e9nsavas italok"], "tr": ["Gazl\u0131 i\u00e7ecekler"], "nl": ["Koolzuurhoudende dranken"], "fi": ["hiilihapotetut juomat"], "fr": ["Boissons gazeuses", "boissons \u00e0 bulles", "boissons effervescentes", "boissons p\u00e9tillantes", "boisson gazeuse", "boisson \u00e0 bulles", "boisson effervescente", "boisson p\u00e9tillante"], "sl": ["gazirane pija\u010de", "gazirana pija\u010da"], "de": ["Kohlens\u00e4urehaltige Getr\u00e4nke"], "cs": ["Sycen\u00e9 n\u00e1poje"], "en": ["Carbonated drinks"], "zh": ["\u78b3\u9178\u996e\u6599"], "sv": ["Kolsyrade Drycker"], "ca": ["Begudes carbonatades"], "es": ["Bebidas carbonatadas"], "ja": ["\u70ad\u9178\u98f2\u6599"], "bg": ["\u0413\u0430\u0437\u0438\u0440\u0430\u043d\u0438 \u043d\u0430\u043f\u0438\u0442\u043a\u0438"], "it": ["Bevande effervescente", "bevande gassate", "bevande frizzanti"], "hr": ["gazirana pi\u0107a", "gazirano pi\u0107e"], "ru": ["\u0413\u0430\u0437\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u043d\u0430\u043f\u0438\u0442\u043a\u0438"], "pl": ["Napoje gazowane"], "pt": ["Bebidas carbonatadas"], "lt": ["Gazuoti g\u0117rimai", "gazuotas g\u0117rimas"], "da": ["Kulsyreholdige drikkevarer"]}}, "en:plant-based-foods-and-beverages": {"name": {"da": "Plantebaserede f\u00f8devarer og drikkevarer", "pt": "Alimentos e bebidas \u00e0 base de plantas", "lt": "Augalin\u0117s kilm\u0117s maisto produktai ir g\u0117rimai", "pl": "\u017bywno\u015b\u0107 i napoje na bazie ro\u015blin", "no": "Plantebaserte matvarer og drikkevarer", "ru": "\u0415\u0434\u0430 \u0438 \u043d\u0430\u043f\u0438\u0442\u043a\u0438 \u0438\u0437 \u0440\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0433\u043e \u0441\u044b\u0440\u044c\u044f", "hr": "Biljne hrane i pi\u0107a", "it": "Cibi e bevande a base vegetale", "bg": "\u0420\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u043d\u0438 \u0445\u0440\u0430\u043d\u0438 \u0438 \u043d\u0430\u043f\u0438\u0442\u043a\u0438", "es": "Alimentos y bebidas de origen vegetal", "sv": "V\u00e4xtbaserad mat och dryck", "ca": "Aliments i begudes amb base vegetal", "en": "Plant-based foods and beverages", "zh": "\u690d\u7269\u6027\u98df\u7269\u4e0e\u996e\u54c1", "de": "Pflanzliche Lebensmittel und Getr\u00e4nke", "fr": "Aliments et boissons \u00e0 base de v\u00e9g\u00e9taux", "fi": "Kasvipohjaiset ruoat ja juomat", "ro": "Alimente \u0219i b\u0103uturi pe baz\u0103 de plante", "tr": "Bitkisel yiyecek ve i\u00e7ecekler", "nl": "Plantaardige levensmiddelen en dranken", "he": "\u05de\u05d6\u05d5\u05e0\u05d5\u05ea \u05d5\u05de\u05e9\u05e7\u05d0\u05d5\u05ea \u05e2\u05dc \u05d1\u05e1\u05d9\u05e1 \u05e6\u05de\u05d7\u05d9", "hu": "N\u00f6v\u00e9nyi alap\u00fa \u00e9lelmiszerek \u00e9s italok"}, "synonyms": {"zh": ["\u690d\u7269\u6027\u98df\u7269\u4e0e\u996e\u54c1"], "en": ["Plant-based foods and beverages"], "ca": ["Aliments i begudes amb base vegetal"], "sv": ["V\u00e4xtbaserad mat och dryck"], "bg": ["\u0420\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u043d\u0438 \u0445\u0440\u0430\u043d\u0438 \u0438 \u043d\u0430\u043f\u0438\u0442\u043a\u0438"], "es": ["Alimentos y bebidas de origen vegetal"], "it": ["Cibi e bevande a base vegetale"], "hr": ["Biljne hrane i pi\u0107a"], "ru": ["\u0415\u0434\u0430 \u0438 \u043d\u0430\u043f\u0438\u0442\u043a\u0438 \u0438\u0437 \u0440\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0433\u043e \u0441\u044b\u0440\u044c\u044f"], "no": ["Plantebaserte matvarer og drikkevarer", "Plantebaserte mat- og drikkevarer", "Plantebasert mat og drikke"], "pl": ["\u017bywno\u015b\u0107 i napoje na bazie ro\u015blin"], "pt": ["Alimentos e bebidas \u00e0 base de plantas"], "da": ["Plantebaserede f\u00f8devarer og drikkevarer"], "lt": ["Augalin\u0117s kilm\u0117s maisto produktai ir g\u0117rimai"], "hu": ["N\u00f6v\u00e9nyi alap\u00fa \u00e9lelmiszerek \u00e9s italok"], "he": ["\u05de\u05d6\u05d5\u05e0\u05d5\u05ea \u05d5\u05de\u05e9\u05e7\u05d0\u05d5\u05ea \u05e2\u05dc \u05d1\u05e1\u05d9\u05e1 \u05e6\u05de\u05d7\u05d9"], "tr": ["Bitkisel yiyecek ve i\u00e7ecekler", "Bitki kaynakl\u0131 yiyecek ve i\u00e7ecekler", "Bitki yiyecek ve i\u00e7ecekleri"], "nl": ["Plantaardige levensmiddelen en dranken"], "ro": ["Alimente \u0219i b\u0103uturi pe baz\u0103 de plante"], "fi": ["Kasvipohjaiset ruoat ja juomat"], "fr": ["Aliments et boissons \u00e0 base de v\u00e9g\u00e9taux"], "de": ["Pflanzliche Lebensmittel und Getr\u00e4nke"]}, "children": ["en:dairy-substitutes", "en:plant-based-beverages", "en:plant-based-foods", "en:vegetable-based-foods-and-beverages"]}, "en:digestives-covered-with-chocolate": {"name": {"en": "Digestives covered with chocolate", "fr": "Sabl\u00e9s napp\u00e9s de chocolat"}, "parents": ["en:chocolate-biscuits"], "synonyms": {"en": ["Digestives covered with chocolate", "Chocolate-covered biscuits"], "fr": ["Sabl\u00e9s napp\u00e9s de chocolat", "Biscuits napp\u00e9s de chocolat"]}}, "en:desserts": {"children": ["en:baklava", "en:black-forest-gateau", "en:charlottes", "en:chocolate-desserts", "en:chocolate-soft-cake", "en:clafoutis", "en:coffee-desserts", "en:compotes", "en:crumbles", "en:dairy-desserts", "en:floating-island", "en:french-toast", "en:frozen-desserts", "en:fruit-liegeois", "en:fruits-compote-with-reduced-sugar", "en:fruits-desserts", "en:fruits-in-syrup", "en:fruits-puree-without-sugar-added", "en:gazelle-horn", "en:goblet-of-belle-helene-pear-ice-cream", "en:goblet-of-chocolate-ice-cream-topped-with-whipped-cream", "en:goblet-of-coffee-ice-cream-topped-with-whipped-cream", "en:goblet-of-peach-melba-ice-cream", "en:jelly-desserts", "en:kadaif", "en:liegeois", "en:macarons", "en:meringue-roulades", "en:mochi", "en:non-dairy-desserts", "en:panforte", "en:peach-melba-with-vanilla-ice-cream-and-raspberry-sauce", "en:puddings", "en:refrigerated-desserts", "en:shelf-stable-desserts", "en:soft-cake-filled-with-chocolate", "en:soft-cake-filled-with-fruit-paste-and-coated-with-sugar-icing", "en:sponge-cake-with-fruit-mousse", "en:sponge-cakes", "en:sponge-puddings", "en:sushki", "en:sweet-mousses", "en:tartufo", "en:trifles", "es:roscon", "fi:m\u00e4mmi", "fr:buches-patissieres", "fr:pastel-de-nata"], "synonyms": {"he": ["\u05e7\u05d9\u05e0\u05d5\u05d7\u05d9\u05dd"], "hu": ["Desszertek"], "tr": ["Tatl\u0131lar"], "nl": ["Desserts", "Nagerechten"], "fr": ["Desserts"], "fi": ["J\u00e4lkiruoat"], "de": ["Desserts", "Nachtische", "Nachspeisen"], "sv": ["Efterr\u00e4tter", "Desserter"], "ca": ["Postres"], "en": ["Desserts"], "zh": ["\u751c\u54c1", "\u751c\u70b9"], "hr": ["deserti", "desert"], "it": ["Dessert"], "es": ["Postres"], "ja": ["\u30c7\u30b6\u30fc\u30c8"], "bg": ["\u0414\u0435\u0441\u0435\u0440\u0442\u0438"], "ru": ["\u0414\u0435\u0441\u0435\u0440\u0442\u044b", "\u0414\u0435\u0441\u0435\u0440\u0442"], "da": ["Desserter"], "pt": ["Sobremesas"], "lt": ["Desertai"], "pl": ["Desery"]}, "name": {"de": "Desserts", "fi": "J\u00e4lkiruoat", "fr": "Desserts", "tr": "Tatl\u0131lar", "nl": "Desserts", "hu": "Desszertek", "he": "\u05e7\u05d9\u05e0\u05d5\u05d7\u05d9\u05dd", "pl": "Desery", "da": "Desserter", "pt": "Sobremesas", "lt": "Desertai", "ru": "\u0414\u0435\u0441\u0435\u0440\u0442\u044b", "es": "Postres", "ja": "\u30c7\u30b6\u30fc\u30c8", "bg": "\u0414\u0435\u0441\u0435\u0440\u0442\u0438", "it": "Dessert", "hr": "Deserti", "zh": "\u751c\u54c1", "en": "Desserts", "sv": "Efterr\u00e4tter", "ca": "Postres"}, "wikidata": {"en": "Q182940"}}, "en:sugars": {"nova": {"en": "2"}, "pnns_group_2": {"en": "Sweets"}, "agribalyse_proxy_food_code": {"en": "31016"}, "parents": ["en:sweeteners"], "ciqual_proxy_food_name": {"en": "Sugar, white", "fr": "Sucre blanc"}, "intake24_category_code": {"en": "SUGA"}, "name": {"pl": "Cukier", "da": "Sukker", "pt": "A\u00e7ucar", "ru": "\u0421\u0430\u0445\u0430\u0440", "ja": "\u7802\u7cd6", "es": "Az\u00facar", "bg": "\u0417\u0430\u0445\u0430\u0440", "hr": "\u0160e\u010deri", "it": "Zucchero", "zh": "\u7cd6\u7c7b", "en": "Sugars", "sv": "Socker", "ca": "Sucre", "de": "Zucker", "cs": "Cukr", "fi": "Sokerit", "fr": "Sucres", "tr": "\u015eeker", "nl": "Suiker", "hu": "Cukrok"}, "ciqual_proxy_food_code": {"en": "31016"}, "food_groups": {"en": "en:sweets"}, "wikidata": {"en": "Q11002"}, "children": ["en:beet-sugars", "en:brown-sugars", "en:cane-sugar", "en:caster-sugars", "en:coconut-sugar", "en:cotton-candy-sugars", "en:dextrose", "en:granulated-sugars", "en:inverted-sugar-syrups", "en:lump-sugar", "en:maple-sugars", "en:palm-sugars", "en:powdered-sugars", "en:rock-candies", "en:vanilla-sugars", "en:vanillin-sugars", "fr:sucres-complets", "fr:sucres-gelifiant", "fr:sucres-mi-blancs", "fr:sucres-perles", "fr:vergeoises"], "synonyms": {"tr": ["\u015eeker"], "nl": ["Suiker"], "hu": ["Cukrok"], "de": ["Zucker"], "cs": ["Cukr"], "fi": ["sokerit"], "fr": ["Sucres", "sucre"], "bg": ["\u0417\u0430\u0445\u0430\u0440"], "ja": ["\u7802\u7cd6"], "es": ["Az\u00facar"], "hr": ["\u0161e\u010deri", "\u0161e\u010der"], "it": ["Zucchero"], "zh": ["\u7cd6\u7c7b"], "en": ["Sugars"], "sv": ["Socker"], "ca": ["Sucre"], "pl": ["Cukier"], "pt": ["A\u00e7ucar"], "da": ["Sukker"], "ru": ["\u0421\u0430\u0445\u0430\u0440", "\u0441\u0430\u0445\u0430\u0440\u0430"]}}, "en:chocolate-biscuits": {"ciqual_proxy_food_code": {"en": "24036"}, "name": {"lt": "\u0160okoladiniai sausainiai", "pt": "Biscoitos de chocolate", "da": "Chokoladekiks", "ru": "\u0428\u043e\u043a\u043e\u043b\u0430\u0434\u043d\u043e\u0435 \u043f\u0435\u0447\u0435\u043d\u044c\u0435", "it": "Biscotti al cioccolato", "hr": "\u010cokoladni keksi", "es": "Galletas de chocolate", "bg": "\u0428\u043e\u043a\u043e\u043b\u0430\u0434\u043e\u0432\u0438 \u0431\u0438\u0441\u043a\u0432\u0438\u0442\u0438", "ca": "Galetes de xocolata", "zh": "\u5de7\u514b\u529b\u997c\u5e72", "en": "Chocolate biscuits", "de": "Schokoladenkekse", "fr": "Biscuits au chocolat", "fi": "Suklaakeksit", "ro": "Biscui\u021bi cu ciocolat\u0103", "nl": "Chocolade koekjes", "tr": "\u00c7ikolatal\u0131 bisk\u00fcvi", "he": "\u05d1\u05d9\u05e1\u05e7\u05d5\u05d5\u05d9\u05d8 \u05e9\u05d5\u05e7\u05d5\u05dc\u05d3", "hu": "Csokol\u00e1d\u00e9s kekszek"}, "parents": ["en:biscuits"], "agribalyse_proxy_food_code": {"en": "24036"}, "ciqual_proxy_food_name": {"en": "Biscuit -cookie-, with chocolate, prepacked"}, "children": ["en:biscuit-with-a-chocolate-bar-covering", "en:chocolate-chip-cookies", "en:chocolate-sprits-biscuits", "en:chocolate-stuffed-wafers", "en:crepes-dentelle-with-chocolate", "en:crispy-biscuit-with-reduced-fat-chocolate", "en:dark-chocolate-biscuits", "en:digestives-covered-with-chocolate", "en:florentines", "en:milk-chocolate-biscuits", "en:stick-biscuits-covered-with-chocolate", "en:white-chocolate-biscuits", "fr:barquettes-au-chocolat"], "synonyms": {"hr": ["\u010cokoladni keksi"], "it": ["Biscotti al cioccolato"], "bg": ["\u0428\u043e\u043a\u043e\u043b\u0430\u0434\u043e\u0432\u0438 \u0431\u0438\u0441\u043a\u0432\u0438\u0442\u0438"], "es": ["Galletas de chocolate"], "ca": ["Galetes de xocolata"], "en": ["Chocolate biscuits", "Biscuit with chocolate"], "zh": ["\u5de7\u514b\u529b\u997c\u5e72"], "pt": ["Biscoitos de chocolate"], "lt": ["\u0160okoladiniai sausainiai", "Sausainiai su \u0161okoladu"], "da": ["Chokoladekiks"], "ru": ["\u0428\u043e\u043a\u043e\u043b\u0430\u0434\u043d\u043e\u0435 \u043f\u0435\u0447\u0435\u043d\u044c\u0435"], "nl": ["Chocolade koekjes", "Chocolade koekje", "Chocoladebiscuits", "Chocoladebiscuit"], "tr": ["\u00c7ikolatal\u0131 bisk\u00fcvi"], "he": ["\u05d1\u05d9\u05e1\u05e7\u05d5\u05d5\u05d9\u05d8 \u05e9\u05d5\u05e7\u05d5\u05dc\u05d3", "\u05d5\u05e4\u05dc \u05e9\u05d5\u05e7\u05d5\u05dc\u05d3", "\u05d5\u05d0\u05e4\u05dc \u05e9\u05d5\u05e7\u05d5\u05dc\u05d3", "\u05d1\u05e4\u05dc\u05d4 \u05e9\u05d5\u05e7\u05d5\u05dc\u05d4", "\u05d1\u05d0\u05e4\u05dc\u05d4 \u05e9\u05d5\u05e7\u05d5\u05dc\u05d3"], "hu": ["Csokol\u00e1d\u00e9s kekszek"], "de": ["Schokoladenkekse"], "fr": ["Biscuits au chocolat", "biscuit au chocolat", "biscuit chocolat\u00e9", "biscuits chocolat\u00e9s", "biscuits go\u00fbt chocolat", "biscuits saveur chocolat", "Biscuit sec au chocolat"], "fi": ["suklaakeksit"], "ro": ["Biscui\u021bi cu ciocolat\u0103"]}}, "en:sweetened-beverages": {"pnns_group_2": {"en": "Sweetened beverages"}, "name": {"cs": "Slazen\u00e9 n\u00e1poje", "de": "Gezuckerte Getr\u00e4nke", "fr": "Boissons avec sucre ajout\u00e9", "fi": "Makeutetut juomat", "ro": "B\u0103uturi dulci", "nl": "Gesuikerde dranken", "tr": "Tatl\u0131 i\u00e7ecekler", "he": "\u05de\u05e9\u05e7\u05d0\u05d5\u05ea \u05de\u05ea\u05d5\u05e7\u05d9\u05dd", "hu": "Cukros italok", "pt": "Bebidas ado\u00e7adas", "lt": "Saldinti g\u0117rimai", "da": "S\u00f8dede drikkevarer", "pl": "Napoje s\u0142odzone", "ru": "\u041d\u0430\u043f\u0438\u0442\u043a\u0438 \u0441 \u0434\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u0438\u0435\u043c \u0441\u0430\u0445\u0430\u0440\u0430", "it": "Bevande zuccherate", "hr": "Zasla\u0111ena pi\u0107a", "es": "Bebidas azucaradas", "bg": "\u041f\u043e\u0434\u0441\u043b\u0430\u0434\u0435\u043d\u0438 \u043d\u0430\u043f\u0438\u0442\u043a\u0438", "th": "\u0e19\u0e49\u0e33\u0e2b\u0e27\u0e32\u0e19", "ca": "Begudes ensucrades", "en": "Sweetened beverages"}, "parents": ["en:beverages"], "food_groups": {"en": "en:sweetened-beverages"}, "synonyms": {"it": ["Bevande zuccherate"], "hr": ["Zasla\u0111ena pi\u0107a"], "th": ["\u0e19\u0e49\u0e33\u0e2b\u0e27\u0e32\u0e19"], "bg": ["\u041f\u043e\u0434\u0441\u043b\u0430\u0434\u0435\u043d\u0438 \u043d\u0430\u043f\u0438\u0442\u043a\u0438"], "es": ["Bebidas azucaradas"], "ca": ["Begudes ensucrades"], "en": ["Sweetened beverages", "Sugared beverages", "Beverages with added sugar"], "pt": ["Bebidas ado\u00e7adas", "Bebidas a\u00e7ucaradas", "Bebidas com adi\u00e7\u00e3o de a\u00e7\u00facar"], "lt": ["Saldinti g\u0117rimai", "G\u0117rimai su prid\u0117tiniu cukrumi"], "da": ["S\u00f8dede drikkevarer", "Sukkerholdige drikkevarer", "Drikkevarer med tilsat sukker"], "pl": ["Napoje s\u0142odzone", "Napoje z dodatkiem cukru"], "ru": ["\u041d\u0430\u043f\u0438\u0442\u043a\u0438 \u0441 \u0434\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u0438\u0435\u043c \u0441\u0430\u0445\u0430\u0440\u0430"], "tr": ["Tatl\u0131 i\u00e7ecekler", "\u015eekerli i\u00e7ecekler"], "nl": ["Gesuikerde dranken"], "he": ["\u05de\u05e9\u05e7\u05d0\u05d5\u05ea \u05de\u05ea\u05d5\u05e7\u05d9\u05dd", "\u05de\u05e9\u05e7\u05d0\u05d5\u05ea \u05de\u05de\u05d5\u05ea\u05e7\u05d9\u05dd"], "hu": ["Cukros italok"], "cs": ["Slazen\u00e9 n\u00e1poje"], "de": ["Gezuckerte Getr\u00e4nke", "Zuckerhaltige Getr\u00e4nke"], "fr": ["Boissons avec sucre ajout\u00e9", "Boissons sucr\u00e9es"], "fi": ["Makeutetut juomat"], "ro": ["B\u0103uturi dulci", "B\u0103uturi cu zah\u0103r", "Bauturi dulci", "Bauturi cu zahar"]}}, "en:creamy-puddings": {"ciqual_food_name": {"en": "Custard dessert, refrigerated or canned -average-", "fr": "Cr\u00e8me dessert, rayon frais ou appertis\u00e9e -aliment moyen-"}, "intake24_category_code": {"en": "CSTD"}, "children": ["en:canned-creamy-puddings", "en:caramel-creamy-puddings", "en:chocolate-creamy-puddings", "en:coffee-creamy-puddings", "en:double-flavour-creamy-puddings", "en:refrigerated-creamy-puddings", "en:vanilla-creamy-puddings", "es:natillas-con-galleta"], "synonyms": {"de": ["Cremedesserts"], "fi": ["j\u00e4lkiruokakermat"], "fr": ["Cr\u00e8mes dessert", "Cr\u00e8mes desserts", "Cr\u00e8mes dessert lact\u00e9es", "cr\u00e8mes dessert \u00e0 base de lait", "cr\u00e8mes dessert au lait"], "nl": ["Vla's"], "lt": ["Desertiniai kremai", "kreminiai desertai"], "pt": ["Cremes de sobremesa"], "es": ["Postres de cremas"], "it": ["Creme dolci"], "hr": ["Kremasti puding"], "en": ["Creamy puddings", "Dessert creams", "Custard desserts"], "ca": ["Postres cremoses"]}, "name": {"en": "Creamy puddings", "ca": "Postres cremoses", "es": "Postres de cremas", "hr": "Kremasti puding", "it": "Creme dolci", "lt": "Desertiniai kremai", "pt": "Cremes de sobremesa", "nl": "Vla's", "fi": "J\u00e4lkiruokakermat", "fr": "Cr\u00e8mes dessert", "de": "Cremedesserts"}, "parents": ["en:dairy-desserts", "en:entremets-mousses-and-creamy-puddings"], "ciqual_food_code": {"en": "39511"}}, "en:lemonade": {"agribalyse_proxy_food_code": {"en": "18010"}, "agribalyse_proxy_food_name": {"fr": "Limonade, sucr\u00e9e", "en": "Lemonade, with sugar"}, "parents": ["en:sodas"], "wikidata": {"en": "Q893"}, "wikipedia": {"en": "https://en.wikipedia.org/wiki/Lemonade"}, "synonyms": {"fa": ["\u0634\u0631\u0628\u062a \u0622\u0628\u0644\u06cc\u0645\u0648"], "ar": ["\u0634\u0631\u0627\u0628 \u0627\u0644\u0644\u064a\u0645\u0648\u0646"], "pa": ["\u0a38\u0a3c\u0a3f\u0a15\u0a70\u0a1c\u0a35\u0a40"], "la": ["Limonata"], "he": ["\u05dc\u05d9\u05de\u05d5\u05e0\u05d3\u05d4"], "ko": ["\ub808\ubaa8\ub124\uc774\ub4dc"], "bs": ["Limunada"], "hy": ["\u053c\u056b\u0574\u0578\u0576\u0561\u0564"], "ru": ["\u043b\u0438\u043c\u043e\u043d\u0430\u0434"], "it": ["limonata", "Bibita gassata"], "ml": ["\u0d28\u0d3e\u0d30\u0d19\u0d4d\u0d19\u0d3e\u0d35\u0d46\u0d33\u0d4d\u0d33\u0d02"], "ca": ["llimonada"], "mk": ["\u041b\u0438\u043c\u043e\u043d\u0430\u0434\u0430"], "id": ["Limun"], "mr": ["\u0932\u093f\u0902\u092c\u0942 \u0938\u0930\u092c\u0924"], "lb": ["Limonad"], "kk": ["\u041b\u0438\u043c\u043e\u043d\u0430\u0434"], "uz": ["Limonad"], "ta": ["\u0b8e\u0bb2\u0bc1\u0bae\u0bbf\u0b9a\u0bcd\u0b9a\u0bc8\u0b9a\u0bcd\u0b9a\u0bbe\u0bb1\u0bc1"], "pt": ["limonada"], "da": ["Limonade"], "lt": ["Limonadas", "Limonadai"], "eu": ["Limonada"], "hr": ["limunade", "limunada"], "gl": ["Limoada"], "bg": ["\u041b\u0438\u043c\u043e\u043d\u0430\u0434\u0430"], "be": ["\u043b\u0456\u043c\u0430\u043d\u0430\u0434"], "cv": ["\u041b\u0438\u043c\u043e\u043d\u0430\u0434"], "et": ["Limonaad"], "nb": ["limonade"], "sr": ["\u043b\u0438\u043c\u0443\u043d\u0430\u0434\u0430"], "zh": ["\u6ab8\u6aac\u6c34"], "io": ["Limonado"], "ro": ["Limonad\u0103", "Limonada"], "uk": ["\u043b\u0438\u043c\u043e\u043d\u0430\u0434"], "ne": ["\u0932\u0947\u092e\u094b\u0928\u0947\u0921"], "nl": ["Limonade", "Limonades"], "tr": ["Limonata"], "sh": ["Limunada"], "pl": ["Lemoniada", "Lemoniady"], "cy": ["Lemon\u00ead"], "az": ["Limonad"], "oc": ["limonada"], "el": ["\u039b\u03b5\u03bc\u03bf\u03bd\u03ac\u03b4\u03b1"], "sd": ["\u0633\u06aa\u0646\u062c\u0628\u064a\u0646"], "en": ["Lemonade", "Lemonades"], "cs": ["limon\u00e1da"], "de": ["Limonaden", "limonade"], "fr": ["Limonades", "limonade"], "fi": ["Sitruunasooda"], "eo": ["Limonado"], "ga": ["L\u00edoman\u00e1id Dhearg"], "ba": ["\u041b\u0438\u043c\u043e\u043d\u0430\u0434"], "hu": ["limon\u00e1d\u00e9"], "ja": ["\u30ec\u30e2\u30cd\u30fc\u30c9"], "es": ["Limonadas", "limonada"], "ur": ["\u0633\u06a9\u0646\u062c\u0628\u06cc\u0646"]}, "children": ["en:lemonade-with-flavoured-syrup", "en:lemonade-with-sugar", "en:lemonade-with-sugar-and-artificial-sweeteners", "en:lemonade-without-sugar-with-artificial-sweeteners"], "name": {"sr": "\u041b\u0438\u043c\u0443\u043d\u0430\u0434\u0430", "nb": "Limonade", "zh": "\u6ab8\u6aac\u6c34", "gl": "Limoada", "hr": "Limunade", "cv": "\u041b\u0438\u043c\u043e\u043d\u0430\u0434", "et": "Limonaad", "be": "\u041b\u0456\u043c\u0430\u043d\u0430\u0434", "bg": "\u041b\u0438\u043c\u043e\u043d\u0430\u0434\u0430", "eu": "Limonada", "lt": "Limonadas", "pt": "Limonada", "da": "Limonade", "ta": "\u0b8e\u0bb2\u0bc1\u0bae\u0bbf\u0b9a\u0bcd\u0b9a\u0bc8\u0b9a\u0bcd\u0b9a\u0bbe\u0bb1\u0bc1", "uz": "Limonad", "kk": "\u041b\u0438\u043c\u043e\u043d\u0430\u0434", "lb": "Limonad", "mr": "\u0932\u093f\u0902\u092c\u0942 \u0938\u0930\u092c\u0924", "id": "Limun", "ca": "Llimonada", "mk": "\u041b\u0438\u043c\u043e\u043d\u0430\u0434\u0430", "it": "Limonata", "ml": "\u0d28\u0d3e\u0d30\u0d19\u0d4d\u0d19\u0d3e\u0d35\u0d46\u0d33\u0d4d\u0d33\u0d02", "hy": "\u053c\u056b\u0574\u0578\u0576\u0561\u0564", "bs": "Limunada", "ru": "\u041b\u0438\u043c\u043e\u043d\u0430\u0434", "ko": "\ub808\ubaa8\ub124\uc774\ub4dc", "he": "\u05dc\u05d9\u05de\u05d5\u05e0\u05d3\u05d4", "pa": "\u0a38\u0a3c\u0a3f\u0a15\u0a70\u0a1c\u0a35\u0a40", "la": "Limonata", "ar": "\u0634\u0631\u0627\u0628 \u0627\u0644\u0644\u064a\u0645\u0648\u0646", "fa": "\u0634\u0631\u0628\u062a \u0622\u0628\u0644\u06cc\u0645\u0648", "ur": "\u0633\u06a9\u0646\u062c\u0628\u06cc\u0646", "es": "Limonadas", "ja": "\u30ec\u30e2\u30cd\u30fc\u30c9", "hu": "Limon\u00e1d\u00e9", "ba": "\u041b\u0438\u043c\u043e\u043d\u0430\u0434", "fr": "Limonades", "ga": "L\u00edoman\u00e1id Dhearg", "eo": "Limonado", "fi": "Sitruunasooda", "cs": "Limon\u00e1da", "de": "Limonaden", "sd": "\u0633\u06aa\u0646\u062c\u0628\u064a\u0646", "en": "Lemonade", "el": "\u039b\u03b5\u03bc\u03bf\u03bd\u03ac\u03b4\u03b1", "az": "Limonad", "oc": "Limonada", "cy": "Lemon\u00ead", "pl": "Lemoniada", "sh": "Limunada", "tr": "Limonata", "nl": "Limonade", "io": "Limonado", "ne": "\u0932\u0947\u092e\u094b\u0928\u0947\u0921", "uk": "\u041b\u0438\u043c\u043e\u043d\u0430\u0434", "ro": "Limonad\u0103"}, "description": {"en": "Lemonade can be any one of a variety of sweetened beverages found throughout the world, but which are traditionally all characterized by a lemon flavor."}}, "en:vegetables-based-foods": {"wikidata": {"en": "Q11004"}, "pnns_group_2": {"en": "Vegetables"}, "parents": ["en:fruits-and-vegetables-based-foods"], "children": ["en:canned-vegetables", "en:dried-vegetables", "en:fermented-vegetables", "en:freeze-dried-vegetables", "en:fresh-vegetables", "en:fresh-vegetables-sprouts", "en:frozen-vegetables", "en:garlic-and-their-products", "en:ground-dried-vegetables", "en:mushrooms-and-their-products", "en:onions-and-their-products", "en:pumpkins-and-their-products", "en:tomatoes-and-their-products", "en:vegetable-flans", "en:vegetables", "en:vegetables-freshly-prepared", "en:wild-garlic"], "synonyms": {"de": ["Gem\u00fcsebasierte Lebensmittel"], "fi": ["Kasviper\u00e4iset ruoat"], "fr": ["L\u00e9gumes et d\u00e9riv\u00e9s"], "tr": ["Sebze yiyecekler", "Sebze g\u0131dalar\u0131", "Sebze besinleri", "Sebze kaynakl\u0131 yiyecekler", "Sebze kaynakl\u0131 g\u0131dalar", "Sebze kaynakl\u0131 besinler"], "nl": ["Groente en afgeleide producten"], "hu": ["Z\u00f6lds\u00e9g alap\u00fa \u00e9lelmiszerek"], "he": ["\u05de\u05d6\u05d5\u05df \u05de\u05d1\u05d5\u05e1\u05e1 \u05d9\u05e8\u05e7\u05d5\u05ea"], "pl": ["Produkty na bazie warzyw"], "da": ["Gr\u00f8ntsagsbaserede f\u00f8devarer"], "pt": ["Alimentos \u00e0 base de vegetais"], "lt": ["Dar\u017eov\u0117s ir j\u0173 kilm\u0117s maisto produktai"], "ru": ["\u0431\u043b\u044e\u0434\u0430 \u0438\u0437 \u043e\u0432\u043e\u0449\u0435\u0439"], "es": ["Verduras y hortalizas y sus productos", "Verduras y hortalizas y derivados"], "ja": ["\u91ce\u83dc"], "bg": ["\u0417\u0435\u043b\u0435\u043d\u0447\u0443\u043a\u043e\u0432\u0438 \u0445\u0440\u0430\u043d\u0438"], "th": ["\u0e1c\u0e25\u0e34\u0e15\u0e20\u0e31\u0e13\u0e11\u0e4c\u0e08\u0e32\u0e01\u0e1e\u0e37\u0e0a"], "hr": ["Hrana na bazi povr\u0107a"], "it": ["Cibi a base di verdure"], "en": ["Vegetables based foods"], "ca": ["Aliments amb base de verdures"]}, "name": {"it": "Cibi a base di verdure", "hr": "Hrana na bazi povr\u0107a", "th": "\u0e1c\u0e25\u0e34\u0e15\u0e20\u0e31\u0e13\u0e11\u0e4c\u0e08\u0e32\u0e01\u0e1e\u0e37\u0e0a", "ja": "\u91ce\u83dc", "bg": "\u0417\u0435\u043b\u0435\u043d\u0447\u0443\u043a\u043e\u0432\u0438 \u0445\u0440\u0430\u043d\u0438", "es": "Verduras y hortalizas y sus productos", "ca": "Aliments amb base de verdures", "en": "Vegetables based foods", "lt": "Dar\u017eov\u0117s ir j\u0173 kilm\u0117s maisto produktai", "pt": "Alimentos \u00e0 base de vegetais", "da": "Gr\u00f8ntsagsbaserede f\u00f8devarer", "pl": "Produkty na bazie warzyw", "ru": "\u0411\u043b\u044e\u0434\u0430 \u0438\u0437 \u043e\u0432\u043e\u0449\u0435\u0439", "tr": "Sebze yiyecekler", "nl": "Groente en afgeleide producten", "he": "\u05de\u05d6\u05d5\u05df \u05de\u05d1\u05d5\u05e1\u05e1 \u05d9\u05e8\u05e7\u05d5\u05ea", "hu": "Z\u00f6lds\u00e9g alap\u00fa \u00e9lelmiszerek", "de": "Gem\u00fcsebasierte Lebensmittel", "fr": "L\u00e9gumes et d\u00e9riv\u00e9s", "fi": "Kasviper\u00e4iset ruoat"}, "food_groups": {"en": "en:vegetables"}}, "en:sodas": {"children": ["de:cola-mix", "en:club-sodas", "en:colas", "en:diet-sodas", "en:fruit-sodas", "en:lemonade", "en:root-beers", "en:tonic-water"], "ciqual_proxy_food_name": {"en": "Fruit soft drink, carbonated -less than 10% of fruit juice-, without sugar and with artificial sweetener-s-", "fr": "Boisson gazeuse aux fruits -\u00e0 moins de 10% de jus-, non sucr\u00e9e, avec \u00e9dulcorants"}, "synonyms": {"hu": ["Sz\u00f3d\u00e1k", "Sz\u00f3dav\u00edzek", "Sz\u00edkv\u00edzek"], "he": ["\u05de\u05e9\u05e7\u05d0\u05d5\u05ea \u05de\u05d5\u05d2\u05d6\u05d9\u05dd", "\u05de\u05e9\u05e7\u05d0\u05d5\u05ea \u05e7\u05dc\u05d9\u05dd"], "nl": ["Frisdranken"], "fi": ["virvoitusjuomat"], "ro": ["B\u0103uturi carbogazoase", "Bauturi carbogazoase", "bauturi racoritoare carbogazoase", "b\u0103uturi r\u0103coritoare carbogazoase"], "fr": ["Sodas", "soda", "boissons sucr\u00e9es gazeuses", "boissons gazeuses sucr\u00e9es"], "de": ["Erfrischungsgetr\u00e4nke", "Softdrink", "Erfrischungsgetr\u00e4nk"], "en": ["Sodas", "carbonated soft drinks"], "zh": ["\u6c7d\u6c34", "\u8f6f\u6027\u996e\u6599", "\u82cf\u6253\u996e\u6599"], "ca": ["Refrescs", "refrescs carbonatats"], "sv": ["L\u00e4sk", "L\u00e4skedrycker", "l\u00e4skedryck"], "bg": ["\u0421\u043e\u0434\u0430"], "es": ["Sodas", "Gaseosas"], "th": ["\u0e19\u0e49\u0e33\u0e2d\u0e31\u0e14\u0e25\u0e21"], "hr": ["Zasla\u0111ena gazirana pi\u0107a"], "it": ["Bibite gassate", "soft drink"], "ru": ["\u0413\u0430\u0437\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0431\u0435\u0437\u0430\u043b\u043a\u0430\u0433\u043e\u043b\u044c\u043d\u044b\u0435 \u043d\u0430\u043f\u0438\u0442\u043a\u0438"], "pt": ["Sodas", "bebidas refrescantes", "bebidas refrescantes com gazozo"], "da": ["Sodavand"], "lt": ["Gazuoti gaivieji g\u0117rimai"]}, "intake24_category_code": {"en": "SOFT"}, "nova": {"en": "3"}, "wikidata": {"en": "Q147538"}, "parents": ["en:carbonated-drinks"], "agribalyse_proxy_food_code": {"en": "18340"}, "ciqual_proxy_food_code": {"en": "18340"}, "name": {"fr": "Sodas", "fi": "Virvoitusjuomat", "ro": "B\u0103uturi carbogazoase", "de": "Erfrischungsgetr\u00e4nke", "he": "\u05de\u05e9\u05e7\u05d0\u05d5\u05ea \u05de\u05d5\u05d2\u05d6\u05d9\u05dd", "hu": "Sz\u00f3d\u00e1k", "nl": "Frisdranken", "ru": "\u0413\u0430\u0437\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u0431\u0435\u0437\u0430\u043b\u043a\u0430\u0433\u043e\u043b\u044c\u043d\u044b\u0435 \u043d\u0430\u043f\u0438\u0442\u043a\u0438", "da": "Sodavand", "pt": "Sodas", "lt": "Gazuoti gaivieji g\u0117rimai", "ca": "Refrescs", "sv": "L\u00e4sk", "zh": "\u6c7d\u6c34", "en": "Sodas", "hr": "Zasla\u0111ena gazirana pi\u0107a", "it": "Bibite gassate", "th": "\u0e19\u0e49\u0e33\u0e2d\u0e31\u0e14\u0e25\u0e21", "es": "Sodas", "bg": "\u0421\u043e\u0434\u0430"}}, "en:snacks": {"wikidata": {"en": "Q749316"}, "name": {"ru": "\u0417\u0430\u043a\u0443\u0441\u043a\u0438", "pl": "Przek\u0105ski", "ca": "Snacks", "en": "Snacks", "it": "Snack", "ro": "Gust\u0103ri", "he": "\u05d7\u05d8\u05d9\u05e4\u05d9\u05dd", "tr": "At\u0131\u015ft\u0131rmal\u0131klar", "nl": "Snacks", "no": "Snacks", "lt": "U\u017ekand\u017eiai", "pt": "Lanches comida", "da": "Snacks", "sv": "Snacks", "zh": "\u96f6\u98df", "hr": "Grickalice", "bg": "\u0417\u0430\u043a\u0443\u0441\u043a\u0438", "es": "Botanas", "th": "\u0e02\u0e19\u0e21-\u0e02\u0e2d\u0e07\u0e27\u0e48\u0e32\u0e07", "fr": "Snacks", "fi": "Naposteltavat", "cs": "Sva\u010diny", "de": "Imbiss", "hu": "Snackek"}, "children": ["en:extruded-crispbreads", "en:popcorn", "en:salty-snacks", "en:snacks-variety-packs", "en:sweet-snacks"], "synonyms": {"ro": ["Gust\u0103ri"], "nl": ["Snacks"], "tr": ["At\u0131\u015ft\u0131rmal\u0131klar"], "he": ["\u05d7\u05d8\u05d9\u05e4\u05d9\u05dd"], "pl": ["Przek\u0105ski"], "ru": ["\u0417\u0430\u043a\u0443\u0441\u043a\u0438"], "it": ["Snack"], "en": ["Snacks"], "ca": ["Snacks"], "de": ["Imbiss", "Snack"], "cs": ["Sva\u010diny"], "fi": ["Naposteltavat"], "fr": ["Snacks"], "hu": ["Snackek"], "pt": ["Lanches comida"], "da": ["Snacks"], "lt": ["U\u017ekand\u017eiai"], "no": ["Snacks"], "es": ["Botanas", "Snacks"], "bg": ["\u0417\u0430\u043a\u0443\u0441\u043a\u0438"], "th": ["\u0e02\u0e19\u0e21-\u0e02\u0e2d\u0e07\u0e27\u0e48\u0e32\u0e07"], "hr": ["Grickalice"], "zh": ["\u96f6\u98df"], "sv": ["Snacks", "Tilltugg"]}}, "en:beverages": {"intake24_category_code": {"en": "DRNK"}, "synonyms": {"fr": ["Boissons"], "fi": ["Juomat"], "cs": ["N\u00e1poje"], "de": ["Getr\u00e4nke"], "hu": ["Italok"], "no": ["Drikkevarer"], "pt": ["Bebidas"], "lt": ["G\u0117rimai"], "da": ["Drikkevarer"], "sv": ["Drycker", "Dricka", "Drickor", "Dryck"], "zh": ["\u996e\u6599", "\u98f2\u54c1"], "hr": ["Pi\u0107a"], "ja": ["\u98f2\u6599"], "es": ["Bebidas"], "bg": ["\u041d\u0430\u043f\u0438\u0442\u043a\u0438"], "th": ["\u0e40\u0e04\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e14\u0e37\u0e48\u0e21"], "ro": ["B\u0103uturi", "Bauturi"], "he": ["\u05de\u05e9\u05e7\u05d0\u05d5\u05ea"], "se": ["Drycker", "dryck"], "tr": ["\u0130\u00e7ecekler"], "nl": ["Dranken", "Drank"], "ru": ["\u041d\u0430\u043f\u0438\u0442\u043a\u0438"], "pl": ["Napoje", "Nap\u00f3j"], "ca": ["Begudes"], "en": ["Beverages", "Drinks"], "it": ["Bevande"], "rs": ["Pi\u0107a"]}, "children": ["en:alcoholic-beverages", "en:artificially-sweetened-beverages", "en:basil-seeds-drinks", "en:beverage-variety-packs", "en:carbonated-drinks", "en:coffee-drinks", "en:dairy-drinks", "en:dehydrated-beverages", "en:energy-drinks", "en:fermented-drinks", "en:flavoured-drinks", "en:herbal-tea-beverages", "en:hot-beverages", "en:milsubstitutes", "en:mixed-drinks", "en:non-alcoholic-beverages", "en:plant-based-beverages", "en:sweetened-beverages", "en:tea-based-beverages", "en:unsweetened-beverages", "en:waters", "fr:citronnades"], "name": {"se": "Drycker", "tr": "\u0130\u00e7ecekler", "nl": "Dranken", "he": "\u05de\u05e9\u05e7\u05d0\u05d5\u05ea", "ro": "B\u0103uturi", "it": "Bevande", "rs": "Pi\u0107a", "ca": "Begudes", "en": "Beverages", "pl": "Napoje", "ru": "\u041d\u0430\u043f\u0438\u0442\u043a\u0438", "hu": "Italok", "cs": "N\u00e1poje", "de": "Getr\u00e4nke", "fr": "Boissons", "fi": "Juomat", "hr": "Pi\u0107a", "ja": "\u98f2\u6599", "bg": "\u041d\u0430\u043f\u0438\u0442\u043a\u0438", "es": "Bebidas", "th": "\u0e40\u0e04\u0e23\u0e37\u0e48\u0e2d\u0e07\u0e14\u0e37\u0e48\u0e21", "sv": "Drycker", "zh": "\u996e\u6599", "pt": "Bebidas", "lt": "G\u0117rimai", "da": "Drikkevarer", "no": "Drikkevarer"}, "parents": ["en:beverages-and-beverages-preparations"], "wikidata": {"en": "Q40050"}}, "en:granulated-sugars": {"synonyms": {"fr": ["Sucres blancs", "saccharose", "saccarose", "sucres de table", "sucroses", "sucre blanc"], "fi": ["jalostetut sokerit"], "pt": ["A\u00e7ucares granulados"], "de": ["Kristallzucker", "Wei\u00dfzucker", "wei\u00dfe Zucker"], "pl": ["Cukier bia\u0142y"], "zh": ["\u51b0\u7cd6"], "en": ["Granulated sugars", "White sugars", "Refined sugars", "White sugar", "Refined sugar"], "it": ["Zucchero bianco"], "hr": ["Granulirani \u0161e\u0107eri"], "nl": ["Kristalsuikers"], "es": ["Az\u00facar blanco"], "ja": ["\u30b0\u30e9\u30cb\u30e5\u30fc\u7cd6"]}, "wikidata": {"en": "Q15631442"}, "ciqual_food_code": {"en": "31016"}, "parents": ["en:sugars"], "ciqual_food_name": {"fr": "Sucre blanc", "en": "Sugar, white"}, "agribalyse_food_code": {"en": "31016"}, "name": {"fr": "Sucres blancs", "fi": "Jalostetut sokerit", "pt": "A\u00e7ucares granulados", "de": "Kristallzucker", "pl": "Cukier bia\u0142y", "zh": "\u51b0\u7cd6", "en": "Granulated sugars", "it": "Zucchero bianco", "hr": "Granulirani \u0161e\u0107eri", "nl": "Kristalsuikers", "ja": "\u30b0\u30e9\u30cb\u30e5\u30fc\u7cd6", "es": "Az\u00facar blanco"}}, "en:plant-based-foods": {"parents": ["en:plant-based-foods-and-beverages"], "name": {"ru": "\u0415\u0434\u0430 \u0438\u0437 \u0440\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0433\u043e \u0441\u044b\u0440\u044c\u044f", "pl": "\u017bywno\u015b\u0107 na bazie ro\u015blin", "en": "Plant-based foods", "ca": "Aliments amb base vegetal", "it": "Cibi a base vegetale", "ro": "Alimente pe baz\u0103 de plante", "he": "\u05de\u05d6\u05d5\u05e0\u05d5\u05ea \u05de\u05d4\u05e6\u05d5\u05de\u05d7", "tr": "Bitkisel yiyecekler", "nl": "Plantaardige levensmiddelen", "no": "Plantebasert mat", "da": "Plantebaserede f\u00f8devarer", "pt": "Alimentos \u00e0 base de plantas", "lt": "Augalin\u0117s kilm\u0117s maisto produktai", "zh": "\u690d\u7269\u6027\u98df\u7269", "sv": "V\u00e4xtbaserad mat", "th": "\u0e1c\u0e25\u0e34\u0e15\u0e20\u0e31\u0e13\u0e11\u0e4c\u0e08\u0e32\u0e01\u0e1e\u0e37\u0e0a\u0e17\u0e31\u0e49\u0e07\u0e2b\u0e21\u0e14", "bg": "\u0420\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u043d\u0438 \u0445\u0440\u0430\u043d\u0438", "es": "Alimentos de origen vegetal", "hr": "Hranu biljnog podrijetla", "fi": "Kasvipohjaiset ruoat", "fr": "Aliments d'origine v\u00e9g\u00e9tale", "de": "Pflanzliche Lebensmittel", "hu": "N\u00f6v\u00e9nyialap\u00fa \u00e9lelmiszerek"}, "children": ["en:bran", "en:canned-plant-based-foods", "en:cereals-and-potatoes", "en:cheese-substitutes", "en:coffees", "en:coriander-products", "en:culinary-plants", "en:dried-plant-based-foods", "en:egg-substitutes", "en:elderberry-products", "en:fennel", "en:fenugreek-products", "en:flakes", "en:flours", "en:freeze-dried-plant-based-foods", "en:fresh-chervil", "en:fresh-plant-based-foods", "en:fried-plant-based-foods", "en:frozen-plant-based-foods", "en:fruits-and-vegetables-based-foods", "en:fruits-nuts-seed-variety-packs", "en:herbs-spices-extracts-variety-packs", "en:legumes-and-their-products", "en:liquorice-roots", "en:nutmeg-tree-products", "en:nuts-and-their-products", "en:olive-tree-products", "en:pan-fried-vegetables", "en:pastas", "en:pickles", "en:plant-based-creams", "en:plant-based-meals", "en:plant-based-pickles", "en:plant-based-spreads", "en:potted-plants", "en:powdered-plant-milks", "en:prepared-vegetables", "en:psyllium", "en:puffed-grains", "en:pumpkin-and-squash-plant-products", "en:refrigerated-plant-based-foods", "en:seaweeds-and-their-products", "en:seeds", "en:spices", "en:sprouts", "en:starches", "en:stevia-and-their-products", "en:sunflower-seeds-and-their-products", "en:vegetable-broths", "en:vegetable-fats", "fr:cerfeuil-tubereux", "nl:seroendeng"], "synonyms": {"no": ["Plantebasert mat"], "pt": ["Alimentos \u00e0 base de plantas"], "lt": ["Augalin\u0117s kilm\u0117s maisto produktai"], "da": ["Plantebaserede f\u00f8devarer"], "sv": ["V\u00e4xtbaserad mat"], "zh": ["\u690d\u7269\u6027\u98df\u7269"], "hr": ["Hranu biljnog podrijetla"], "th": ["\u0e1c\u0e25\u0e34\u0e15\u0e20\u0e31\u0e13\u0e11\u0e4c\u0e08\u0e32\u0e01\u0e1e\u0e37\u0e0a\u0e17\u0e31\u0e49\u0e07\u0e2b\u0e21\u0e14"], "bg": ["\u0420\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u043d\u0438 \u0445\u0440\u0430\u043d\u0438"], "es": ["Alimentos de origen vegetal", "Alimentos vegetales", "Vegetales y derivados"], "fr": ["Aliments d'origine v\u00e9g\u00e9tale", "Aliments \u00e0 base de plantes", "V\u00e9g\u00e9taux et d\u00e9riv\u00e9s"], "fi": ["Kasvipohjaiset ruoat"], "de": ["Pflanzliche Lebensmittel"], "hu": ["N\u00f6v\u00e9nyialap\u00fa \u00e9lelmiszerek"], "ru": ["\u0415\u0434\u0430 \u0438\u0437 \u0440\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0433\u043e \u0441\u044b\u0440\u044c\u044f"], "pl": ["\u017bywno\u015b\u0107 na bazie ro\u015blin"], "ca": ["Aliments amb base vegetal"], "en": ["Plant-based foods"], "it": ["Cibi a base vegetale"], "ro": ["Alimente pe baz\u0103 de plante"], "he": ["\u05de\u05d6\u05d5\u05e0\u05d5\u05ea \u05de\u05d4\u05e6\u05d5\u05de\u05d7"], "tr": ["Bitkisel yiyecekler", "Bitki kaynakl\u0131 yiyecekler", "Bitki yiyecekleri"], "nl": ["Plantaardige levensmiddelen"]}}, "en:dairies": {"wikidata": {"en": "Q185217"}, "name": {"ms": "Produk tenusu", "hr": "Mlije\u010dni proizvodi", "gl": "Produtos l\u00e1cteos", "hi": "\u0926\u0941\u0917\u094d\u0927-\u0909\u0924\u094d\u092a\u093e\u0926", "be": "\u041c\u0430\u043b\u043e\u0447\u043d\u044b\u044f \u043f\u0440\u0430\u0434\u0443\u043a\u0442\u044b", "bg": "\u041c\u043b\u0435\u0447\u043d\u0438 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u0438", "et": "Piimatoode", "th": "\u0e1c\u0e25\u0e34\u0e15\u0e20\u0e31\u0e13\u0e11\u0e4c\u0e08\u0e32\u0e01\u0e19\u0e21", "sr": "\u041c\u043b\u0435\u0447\u043d\u0438 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438", "zh": "\u4e73\u88fd\u54c1", "si": "\u0d9c\u0dc0 \u0d9a\u0dd2\u0dbb\u0dd2 \u0d86\u0dc1\u0dca\u200d\u0dbb\u0dd2\u0dad \u0db1\u0dd2\u0dc2\u0dca\u0db4\u0dcf\u0daf\u0db1", "pt": "Latic\u00ednios", "lt": "Pieno produktai", "da": "M\u00e6lkeprodukter", "bn": "\u09a6\u09c1\u0997\u09cd\u09a7\u099c\u09be\u09a4 \u09aa\u09a3\u09cd\u09af", "eu": "Esneki", "kn": "\u0c95\u0ccd\u0cb7\u0cc0\u0cb0\u0ccb\u0ca4\u0ccd\u0caa\u0ca8\u0ccd\u0ca8", "kk": "\u0421\u04af\u0442 \u0442\u0430\u0493\u0430\u043c\u0434\u0430\u0440\u044b", "uz": "Sut mahsulotlari", "lv": "Piena produkti", "ta": "\u0baa\u0bbe\u0bb2\u0bcd \u0baa\u0bca\u0bb0\u0bc1\u0bb3\u0bcd", "id": "Produk susu", "it": "Latticini", "mn": "\u0421\u04af\u04af\u043d \u0431\u04af\u0442\u044d\u044d\u0433\u0434\u044d\u0445\u04af\u04af\u043d", "ca": "Productes l\u00e0ctics", "af": "Suiwelproduk", "ko": "\uc720\uc81c\ud488", "hy": "\u053f\u0561\u0569\u0576\u0561\u0574\u0569\u0565\u0580\u0584", "bs": "Mlije\u010dni proizvodi", "ru": "\u041c\u043e\u043b\u043e\u0447\u043d\u044b\u0435 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u044b", "ar": "\u0645\u0646\u062a\u062c\u0627\u062a`\u0627\u0644\u0623\u0644\u0628\u0627\u0646", "pa": "\u0a21\u0a47\u0a05\u0a30\u0a40 \u0a09\u0a24\u0a2a\u0a3e\u0a26", "he": "\u05de\u05d5\u05e6\u05e8\u05d9 \u05d7\u05dc\u05d1", "br": "Boued-laezh", "fa": "\u0641\u0631\u0627\u0648\u0631\u062f\u0647 \u0644\u0628\u0646\u06cc", "sl": "Mle\u010dni izdelek", "vi": "Ch\u1ebf ph\u1ea9m s\u1eefa", "es": "L\u00e1cteos", "ja": "\u4e73\u88fd\u54c1", "ur": "\u0688\u06cc\u0631\u06cc \u0645\u0635\u0646\u0648\u0639\u0627\u062a", "sv": "Mejeriprodukt", "yi": "\u05de\u05d9\u05dc\u05db\u05d9\u05d2\u05e1", "no": "Meieriprodukter", "hu": "Tejterm\u00e9kek", "cs": "Ml\u00e9\u010dn\u00e9 v\u00fdrobky", "de": "Milchprodukte", "fr": "Produits laitiers", "eo": "Laktoprodukto", "fi": "Maitotuotteet", "az": "S\u00fcd m\u0259hsullar\u0131", "oc": "Produch lachi\u00e8r", "is": "Mj\u00f3lkurafur\u00f0", "el": "\u0393\u03b1\u03bb\u03b1\u03ba\u03c4\u03bf\u03ba\u03bf\u03bc\u03b9\u03ba\u03ac \u03c0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1", "en": "Dairies", "pl": "Nabia\u0142", "cy": "Cynnyrch llaeth", "fy": "Suvel", "nl": "Zuivelproducten", "tr": "Mand\u0131ra \u00fcr\u00fcn\u00fc", "ro": "Produse lactate", "uk": "\u041c\u043e\u043b\u043e\u0447\u043d\u0456 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u0438"}, "incompatible_with": {"en": "categories:en:dairy-substitutes"}, "synonyms": {"lt": ["Pieno produktai", "Pieno produktas"], "pt": ["latic\u00ednios", "lactic\u00ednios"], "da": ["M\u00e6lkeprodukter", "Mejeriprodukter"], "bn": ["\u09a6\u09c1\u0997\u09cd\u09a7\u099c\u09be\u09a4 \u09aa\u09a3\u09cd\u09af"], "eu": ["Esneki"], "ms": ["Produk tenusu"], "hr": ["mlije\u010dni proizvodi"], "gl": ["Produtos l\u00e1cteos"], "hi": ["\u0926\u0941\u0917\u094d\u0927-\u0909\u0924\u094d\u092a\u093e\u0926"], "bg": ["\u041c\u043b\u0435\u0447\u043d\u0438 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u0438", "\u041c\u043b\u0435\u0447\u0435\u043d \u043f\u0440\u043e\u0434\u0443\u043a\u0442"], "be": ["\u041c\u0430\u043b\u043e\u0447\u043d\u044b\u044f \u043f\u0440\u0430\u0434\u0443\u043a\u0442\u044b"], "et": ["Piimatoode"], "th": ["\u0e1c\u0e25\u0e34\u0e15\u0e20\u0e31\u0e13\u0e11\u0e4c\u0e08\u0e32\u0e01\u0e19\u0e21", "\u0e1c\u0e25\u0e34\u0e15\u0e20\u0e31\u0e13\u0e11\u0e4c\u0e19\u0e21"], "sr": ["\u041c\u043b\u0435\u0447\u043d\u0438 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438"], "si": ["\u0d9c\u0dc0 \u0d9a\u0dd2\u0dbb\u0dd2 \u0d86\u0dc1\u0dca\u200d\u0dbb\u0dd2\u0dad \u0db1\u0dd2\u0dc2\u0dca\u0db4\u0dcf\u0daf\u0db1"], "zh": ["\u4e73\u88fd\u54c1", "\u5976\u5236\u54c1"], "id": ["Produk susu"], "kn": ["\u0c95\u0ccd\u0cb7\u0cc0\u0cb0\u0ccb\u0ca4\u0ccd\u0caa\u0ca8\u0ccd\u0ca8"], "kk": ["\u0421\u04af\u0442 \u0442\u0430\u0493\u0430\u043c\u0434\u0430\u0440\u044b"], "uz": ["Sut mahsulotlari"], "lv": ["Piena produkti"], "ta": ["\u0baa\u0bbe\u0bb2\u0bcd \u0baa\u0bca\u0bb0\u0bc1\u0bb3\u0bcd"], "ko": ["\uc720\uc81c\ud488"], "hy": ["\u053f\u0561\u0569\u0576\u0561\u0574\u0569\u0565\u0580\u0584"], "bs": ["Mlije\u010dni proizvodi"], "ru": ["\u041c\u043e\u043b\u043e\u0447\u043d\u044b\u0435 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u044b"], "it": ["Latticini", "prodotti caseari"], "ca": ["Productes l\u00e0ctics", "L\u00e0ctic"], "mn": ["\u0421\u04af\u04af\u043d \u0431\u04af\u0442\u044d\u044d\u0433\u0434\u044d\u0445\u04af\u04af\u043d"], "af": ["Suiwelproduk"], "fa": ["\u0641\u0631\u0627\u0648\u0631\u062f\u0647 \u0644\u0628\u0646\u06cc"], "sl": ["Mle\u010dni izdelek"], "ar": ["\u0645\u0646\u062a\u062c\u0627\u062a`\u0627\u0644\u0623\u0644\u0628\u0627\u0646"], "pa": ["\u0a21\u0a47\u0a05\u0a30\u0a40 \u0a09\u0a24\u0a2a\u0a3e\u0a26"], "he": ["\u05de\u05d5\u05e6\u05e8\u05d9 \u05d7\u05dc\u05d1", "\u05de\u05b7\u05d7\u05dc\u05b8\u05d1\u05b8\u05d4"], "br": ["Boued-laezh"], "no": ["Meieriprodukter", "Melkeprodukter"], "es": ["L\u00e1cteos", "Productos l\u00e1cteos", "L\u00e1cteo"], "vi": ["Ch\u1ebf ph\u1ea9m s\u1eefa"], "ja": ["\u4e73\u88fd\u54c1"], "ur": ["\u0688\u06cc\u0631\u06cc \u0645\u0635\u0646\u0648\u0639\u0627\u062a"], "sv": ["Mejeriprodukt", "Mejeriprodukter", "Mejeriprodukt", "Mj\u00f6lkprodukter"], "yi": ["\u05de\u05d9\u05dc\u05db\u05d9\u05d2\u05e1"], "cs": ["Ml\u00e9\u010dn\u00e9 v\u00fdrobky", "Ml\u00e9\u010dn\u00fd v\u00fdrobek"], "de": ["Milchprodukte", "Milcherzeugnis"], "fr": ["Produits laitiers", "produit laitier", "laitage", "laitages", "cr\u00e8merie", "cr\u00e8meries"], "eo": ["Laktoprodukto"], "fi": ["Maitotuotteet", "Maitotuote"], "hu": ["Tejterm\u00e9kek", "Tejterm\u00e9k"], "pl": ["Nabia\u0142", "Produkty mleczne"], "cy": ["Cynnyrch llaeth"], "az": ["S\u00fcd m\u0259hsullar\u0131"], "oc": ["Produch lachi\u00e8r"], "is": ["Mj\u00f3lkurafur\u00f0"], "el": ["\u0393\u03b1\u03bb\u03b1\u03ba\u03c4\u03bf\u03ba\u03bf\u03bc\u03b9\u03ba\u03ac \u03c0\u03c1\u03bf\u03ca\u03cc\u03bd\u03c4\u03b1"], "en": ["Dairies", "Dairy", "Milk products", "Dairy products"], "ro": ["Produse lactate", "lactate"], "uk": ["\u041c\u043e\u043b\u043e\u0447\u043d\u0456 \u043f\u0440\u043e\u0434\u0443\u043a\u0442\u0438"], "fy": ["Suvel"], "nl": ["Zuivelproducten", "Melkproducten", "Zuivelproduct", "Zuivel"], "tr": ["Mand\u0131ra \u00fcr\u00fcn\u00fc"]}, "children": ["en:compound-dairy-creams", "en:condensed-milks", "en:creams", "en:custards-and-pastry-creams", "en:dairy-by-products", "en:dairy-desserts", "en:dairy-drinks", "en:dairy-variety-packs", "en:eggnog-without-alcohol", "en:evaporated-milks", "en:fermented-milk-products", "en:light-custard-cream-with-vanilla", "en:milkfat", "en:milks-liquid-and-powder", "en:whey-powder", "nl:opschuimmelken"]}, "en:dried-vegetables": {"name": {"en": "Dried vegetables", "es": "Verduras y hortalizas deshidratadas", "nl": "Gedroogde groenten", "ja": "\u5e72\u3057\u30c8\u30de\u30c8", "bg": "\u0421\u0443\u0448\u0435\u043d\u0438 \u0437\u0435\u043b\u0435\u043d\u0447\u0443\u0446\u0438", "it": "Ortaggi essiccati", "hr": "Su\u0161eno povr\u0107e", "fi": "Kuivatut vihannekset", "fr": "L\u00e9gumes s\u00e9ch\u00e9s", "pl": "Warzywa suszone", "de": "Getrocknetes Gem\u00fcse"}, "parents": ["en:dried-plant-based-foods", "en:vegetables-based-foods"], "who_id": {"en": "16"}, "children": ["en:dried-tomatoes"], "synonyms": {"en": ["Dried vegetables"], "nl": ["Gedroogde groenten"], "bg": ["\u0421\u0443\u0448\u0435\u043d\u0438 \u0437\u0435\u043b\u0435\u043d\u0447\u0443\u0446\u0438"], "ja": ["\u5e72\u3057\u30c8\u30de\u30c8", "\u30c9\u30e9\u30a4\u30c8\u30de\u30c8"], "es": ["Verduras y hortalizas deshidratadas", "Verduras y hortalizas desecadas", "Verduras deshidratadas", "Verduras desecadas", "Hortalizas deshidratadas", "Hortalizas desecadas"], "hr": ["Su\u0161eno povr\u0107e"], "it": ["Ortaggi essiccati"], "fi": ["kuivatut vihannekset"], "fr": ["L\u00e9gumes s\u00e9ch\u00e9s"], "de": ["Getrocknetes Gem\u00fcse"], "pl": ["Warzywa suszone"]}}, "en:coffee-creamy-puddings": {"name": {"lt": "Kavos kremo desertai", "hr": "Kremasti puding od kave", "nl": "Koffie vla's", "fr": "Cr\u00e8mes dessert caf\u00e9", "en": "Coffee creamy puddings"}, "synonyms": {"nl": ["Koffie vla's", "Koffievla's"], "lt": ["Kavos kremo desertai"], "hr": ["Kremasti puding od kave"], "en": ["Coffee creamy puddings", "coffee puddings", "coffee custard dessert"], "fr": ["Cr\u00e8mes dessert caf\u00e9"]}, "children": ["en:refrigerated-coffee-puddings"], "agribalyse_proxy_food_code": {"en": "39246"}, "agribalyse_proxy_food_name": {"en": "Custard dessert, coffee, refrigerated", "fr": "Cr\u00e8me dessert au caf\u00e9, rayon frais"}, "parents": ["en:creamy-puddings"]}, "en:dried-tomatoes": {"parents": ["en:dried-vegetables", "en:tomatoes-and-their-products"], "ciqual_food_code": {"en": "20189"}, "synonyms": {"en": ["Dried tomatoes", "Sun-Dried tomatoes", "Dried tomato", "Sun-Dried tomato"], "hr": ["Su\u0161ene raj\u010dice"], "it": ["Pomodori essiccati"], "es": ["Tomates secos", "Tomates deshidratados", "Tomates desecados"], "nl": ["Gedroogde tomaten"], "bg": ["\u0421\u0443\u0448\u0435\u043d\u0438 \u0434\u043e\u043c\u0430\u0442\u0438"], "fr": ["Tomates s\u00e9ch\u00e9es", "Tomate s\u00e9ch\u00e9e"], "fi": ["kuivatut tomaatit"], "pl": ["Pomidory suszone"], "de": ["Getrocknete Tomaten"]}, "children": ["en:dried-tomato-in-oil", "en:natural-dried-tomatoes"], "name": {"de": "Getrocknete Tomaten", "pl": "Pomidory suszone", "fr": "Tomates s\u00e9ch\u00e9es", "fi": "Kuivatut tomaatit", "hr": "Su\u0161ene raj\u010dice", "it": "Pomodori essiccati", "es": "Tomates secos", "nl": "Gedroogde tomaten", "bg": "\u0421\u0443\u0448\u0435\u043d\u0438 \u0434\u043e\u043c\u0430\u0442\u0438", "en": "Dried tomatoes"}, "agribalyse_food_code": {"en": "20189"}, "ciqual_food_name": {"en": "Tomato, dried", "fr": "Tomate, s\u00e9ch\u00e9e"}}, "en:biscuits": {"wikipedia": {"en": "https://en.wikipedia.org/wiki/Biscuit"}, "ciqual_proxy_food_name": {"en": "Biscuit -cookie-", "fr": "Biscuit sec, sans pr\u00e9cision"}, "children": ["en:almond-cookies", "en:amaretti", "en:assortments-of-biscuits", "en:baci-di-dama", "en:biscuit-cookie-snack-w-chocolate-filling", "en:biscuit-cookie-vitamins-and-chemical-elements-content-guaranteed", "en:biscuit-cookie-vitamins-content-guaranteed", "en:biscuit-with-milk", "en:biscuits-cookies-shelf-stable", "en:biscuits-or-cookies-variety-packs", "en:canestrelli", "en:cat-tongue", "en:chocolate-biscuits", "en:cigarette-russes", "en:crepes-dentelle", "en:crispy-biscuits", "en:drop-cookies", "en:dry-biscuits", "en:filled-biscuits", "en:flaky-biscuits", "en:gluten-free-biscuits", "en:krumiri", "en:neules", "en:oatmeal-cookies", "en:petit-beurre", "en:ricciarelli", "en:rousquilles", "en:shortbread-biscuit-with-butter", "en:shortbread-cookies", "en:speculaas", "en:speculoos", "en:sponge-cake-biscuits-with-fruits-covering", "en:sprits-biscuits", "en:strawberry-biscuits", "en:tartlet-biscuits-with-chocolate", "en:tartlet-biscuits-with-fruit-preparation", "en:toaster-pastries", "en:torcetti", "en:wafers", "es:galletas-maria", "fr:barquettes", "fr:biscuits-a-la-myrtille", "fr:biscuits-edulcores", "fr:biscuits-roses-de-reims", "fr:tartelettes-au-caramel", "it:paste-di-meliga"], "synonyms": {"it": ["Biscotti", "biscotto"], "ca": ["Galetes", "galeta"], "mk": ["\u0431\u0438\u0441\u043a\u0432\u0438\u0442"], "ko": ["\ube44\uc2a4\ud0b7"], "hy": ["\u0562\u056b\u057d\u056f\u057e\u056b\u057f"], "ru": ["\u0431\u0438\u0441\u043a\u0432\u0438\u0442"], "ar": ["\u0628\u0633\u0643\u0648\u064a\u062a"], "he": ["\u05d1\u05d9\u05e1\u05e7\u05d5\u05d5\u05d9\u05d8"], "br": ["Gwispid"], "fa": ["\u0628\u06cc\u0633\u06a9\u0648\u06cc\u062a"], "gl": ["Galleta"], "ms": ["Biskut"], "hr": ["Keksi", "Keks", "biskviti", "biskvit"], "th": ["\u0e1a\u0e34\u0e2a\u0e01\u0e34\u0e15"], "so": ["Buskud"], "bg": ["\u0411\u0438\u0441\u043a\u0432\u0438\u0442\u0438"], "sr": ["\u0431\u0438\u0441\u043a\u0432\u0438\u0442"], "nb": ["kjeks"], "zh": ["\u9762\u997c"], "si": ["\u0dc0\u0dd2\u0dc3\u0dca\u0d9a\u0ddc\u0da7\u0da7\u0dd4\u0dc4\u0dca"], "pt": ["biscoitos", "Biscoito"], "lt": ["Sausainiai", "Sausainis"], "da": ["kiks"], "eu": ["Gaileta"], "wa": ["Bisc\u00fbte"], "ta": ["\u0bae\u0bbe\u0b9a\u0bcd\u0b9a\u0bbf\u0bb2\u0bcd\u0bb2\u0bc1"], "id": ["Biskuit"], "is": ["Kex"], "el": ["\u039c\u03c0\u03b9\u03c3\u03ba\u03cc\u03c4\u03bf"], "az": ["Biskvit"], "oc": ["biscu\u00e8it"], "en": ["Biscuits", "biscuit", "cookies", "cookie"], "pl": ["herbatnik"], "cy": ["Bisged"], "nl": ["Koekjes", "biscuit", "biscuits"], "tr": ["Bisk\u00fcvi"], "nn": ["Kjeks"], "ku": ["Hindok"], "io": ["Bisquito"], "uk": ["\u0411\u0456\u0441\u043a\u0432\u0456\u0442"], "ro": ["Biscui\u021bi", "Biscuit", "Fursecuri", "biscuiti"], "ur": ["\u0628\u0633\u06a9\u0679"], "es": ["Galletas", "galleta"], "vi": ["B\u00edch quy"], "ja": ["\u30d3\u30b9\u30b1\u30c3\u30c8"], "sv": ["Kex"], "tl": ["Biskwit"], "hu": ["Kekszek", "Keksz"], "cs": ["Su\u0161enky"], "gd": ["Briosgaid"], "de": ["Kekse", "Biskuit"], "fr": ["biscuits", "biscuit", "biscuits sucr\u00e9s", "biscuit sucr\u00e9"], "ga": ["Briosca"], "eo": ["biskvito"], "fi": ["Keksit"]}, "carbon_footprint_fr_foodges_ingredient": {"fr": "Biscuit au beurre"}, "wikidata": {"en": "Q13270"}, "agribalyse_proxy_food_code": {"en": "24000"}, "parents": ["en:biscuits-and-cakes", "en:biscuits-and-crackers"], "ciqual_proxy_food_code": {"en": "24000"}, "name": {"ko": "\ube44\uc2a4\ud0b7", "hy": "\u0532\u056b\u057d\u056f\u057e\u056b\u057f", "ru": "\u0411\u0438\u0441\u043a\u0432\u0438\u0442", "it": "Biscotti", "ca": "Galetes", "mk": "\u0411\u0438\u0441\u043a\u0432\u0438\u0442", "fa": "\u0628\u06cc\u0633\u06a9\u0648\u06cc\u062a", "ar": "\u0628\u0633\u0643\u0648\u064a\u062a", "he": "\u05d1\u05d9\u05e1\u05e7\u05d5\u05d5\u05d9\u05d8", "br": "Gwispid", "pt": "Biscoitos", "da": "Kiks", "lt": "Sausainiai", "eu": "Gaileta", "gl": "Galleta", "ms": "Biskut", "hr": "Keksi", "th": "\u0e1a\u0e34\u0e2a\u0e01\u0e34\u0e15", "bg": "\u0411\u0438\u0441\u043a\u0432\u0438\u0442\u0438", "so": "Buskud", "sr": "\u0411\u0438\u0441\u043a\u0432\u0438\u0442", "nb": "Kjeks", "si": "\u0dc0\u0dd2\u0dc3\u0dca\u0d9a\u0ddc\u0da7\u0da7\u0dd4\u0dc4\u0dca", "zh": "\u9762\u997c", "id": "Biskuit", "wa": "Bisc\u00fbte", "ta": "\u0bae\u0bbe\u0b9a\u0bcd\u0b9a\u0bbf\u0bb2\u0bcd\u0bb2\u0bc1", "pl": "Herbatnik", "cy": "Bisged", "el": "\u039c\u03c0\u03b9\u03c3\u03ba\u03cc\u03c4\u03bf", "is": "Kex", "oc": "Biscu\u00e8it", "az": "Biskvit", "en": "Biscuits", "ku": "Hindok", "io": "Bisquito", "uk": "\u0411\u0456\u0441\u043a\u0432\u0456\u0442", "ro": "Biscui\u021bi", "nl": "Koekjes", "tr": "Bisk\u00fcvi", "nn": "Kjeks", "tl": "Biskwit", "ur": "\u0628\u0633\u06a9\u0679", "vi": "B\u00edch quy", "es": "Galletas", "ja": "\u30d3\u30b9\u30b1\u30c3\u30c8", "sv": "Kex", "cs": "Su\u0161enky", "gd": "Briosgaid", "de": "Kekse", "fr": "Biscuits", "ga": "Briosca", "fi": "Keksit", "eo": "Biskvito", "hu": "Kekszek"}}, "en:sweet-snacks": {"nova": {"en": "3"}, "parents": ["en:snacks"], "name": {"ru": "\u0421\u043b\u0430\u0434\u043a\u0438\u0435 \u0437\u0430\u043a\u0443\u0441\u043a\u0438", "pt": "Lanches doces", "da": "S\u00f8de snacks", "lt": "Sald\u016bs u\u017ekand\u017eiai", "pl": "S\u0142odkie przek\u0105ski", "ca": "Aperitius dol\u00e7os", "sv": "S\u00f6ta snacks", "en": "Sweet snacks", "zh": "\u542b\u7cd6\u96f6\u98df", "it": "Snack dolci", "hr": "Slatki zalogaji", "es": "Snacks dulces", "bg": "\u0421\u043b\u0430\u0434\u043a\u0438 \u0437\u0430\u043a\u0443\u0441\u043a\u0438", "fr": "Snacks sucr\u00e9s", "fi": "Makeat naposteltavat", "ro": "Gust\u0103ri dulci", "cs": "Sladk\u00e9 sva\u010diny", "de": "S\u00fc\u00dfer Snack", "he": "\u05d7\u05d8\u05d9\u05e4\u05d9\u05dd \u05de\u05ea\u05d5\u05e7\u05d9\u05dd", "hu": "Cukros \u00e9telek", "nl": "Zoete snacks"}, "incompatible_with": {"en": "categories:en:salty-snacks"}, "synonyms": {"pl": ["S\u0142odkie przek\u0105ski"], "pt": ["Lanches doces", "lanches a\u00e7ucarados"], "lt": ["Sald\u016bs u\u017ekand\u017eiai", "U\u017ekand\u017eiai su cukrumi"], "da": ["S\u00f8de snacks"], "ru": ["\u0421\u043b\u0430\u0434\u043a\u0438\u0435 \u0437\u0430\u043a\u0443\u0441\u043a\u0438"], "es": ["Snacks dulces"], "bg": ["\u0421\u043b\u0430\u0434\u043a\u0438 \u0437\u0430\u043a\u0443\u0441\u043a\u0438"], "hr": ["Slatki zalogaji"], "it": ["Snack dolci"], "en": ["Sweet snacks", "Sugary snacks"], "zh": ["\u542b\u7cd6\u96f6\u98df"], "ca": ["Aperitius dol\u00e7os"], "sv": ["S\u00f6ta snacks", "S\u00f6ta tilltugg"], "de": ["S\u00fc\u00dfer Snack"], "cs": ["Sladk\u00e9 sva\u010diny"], "ro": ["Gust\u0103ri dulci"], "fi": ["Makeat naposteltavat"], "fr": ["Snacks sucr\u00e9s", "sucreries"], "nl": ["Zoete snacks"], "hu": ["Cukros \u00e9telek"], "he": ["\u05d7\u05d8\u05d9\u05e4\u05d9\u05dd \u05de\u05ea\u05d5\u05e7\u05d9\u05dd", "\u05d7\u05d8\u05d9\u05e4\u05d9\u05dd \u05de\u05de\u05d5\u05ea\u05e7\u05d9\u05dd"]}, "children": ["en:bars", "en:biscuits-and-cakes", "en:chaat", "en:chocolates", "en:confectioneries", "en:popcorn-with-caramel", "en:sugared-popcorn", "en:sweet-fritters", "en:sweet-pastries-and-pies"]}, "en:sweeteners": {"name": {"en": "Sweeteners", "zh": "\u7cd6\u7cbe", "sv": "S\u00f6tningsmedel", "ca": "Edulcorants", "ja": "\u7518\u5473\u6599", "es": "Endulzantes", "bg": "\u041f\u043e\u0434\u0441\u043b\u0430\u0434\u0438\u0442\u0435\u043b\u0438", "th": "\u0e43\u0e0a\u0e49\u0e19\u0e49\u0e33\u0e15\u0e32\u0e25\u0e40\u0e17\u0e35\u0e22\u0e21", "it": "Edulcoranti", "hr": "Sladila", "ru": "\u041f\u043e\u0434\u0441\u043b\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u0438 \u0438 \u0437\u0430\u043c\u0435\u043d\u0438\u0442\u0435\u043b\u0438 \u0441\u0430\u0445\u0430\u0440\u0430", "pl": "Produkty wykorzystywane do s\u0142odzenia", "pt": "Ado\u00e7antes", "da": "S\u00f8demidler", "hu": "\u00c9des\u00edt\u0151szerek", "he": "\u05de\u05de\u05ea\u05d9\u05e7\u05d9\u05dd", "tr": "Tatland\u0131r\u0131c\u0131", "nl": "Zoetstoffen", "fi": "Makeutusaineet", "fr": "\u00c9dulcorants", "de": "S\u00fc\u00dfstoffe"}, "gpc_category_description": {"en": "Definition: Includes any products that can be described/observed as a sweet substance, obtained from the different stages of refining the juice of the sugar cane/sugar beet/molasses, or an artificial sweet substance that is specifically labelled and marketed to replace natural sugar, which is used as a sweetener and preservative of food and drinks. Products include white sugars such as castor, cubes, loaf, granulated, icing and preserving, coloured sugars such as brown, barbados, demerara and artificial sweeteners, such as saccharin, sorbitol and xylitol. Definition Excludes: Excludes products such as Syrups, Treacle and Molasses, Sugar Candy and Sugar Candy Substitute Confectionery."}, "gpc_category_name": {"en": "Sugar/Sugar Substitutes -Shelf Stable-"}, "wikidata": {"en": "Q4368298"}, "synonyms": {"he": ["\u05de\u05de\u05ea\u05d9\u05e7\u05d9\u05dd"], "hu": ["\u00c9des\u00edt\u0151szerek"], "nl": ["Zoetstoffen"], "tr": ["Tatland\u0131r\u0131c\u0131", "Tatland\u0131r\u0131c\u0131lar"], "fr": ["\u00c9dulcorants"], "fi": ["makeutusaineet"], "de": ["S\u00fc\u00dfstoffe"], "ca": ["Edulcorants"], "sv": ["S\u00f6tningsmedel"], "zh": ["\u7cd6\u7cbe"], "en": ["Sweeteners"], "it": ["Edulcoranti", "dolcificanti"], "hr": ["Sladila"], "th": ["\u0e43\u0e0a\u0e49\u0e19\u0e49\u0e33\u0e15\u0e32\u0e25\u0e40\u0e17\u0e35\u0e22\u0e21"], "bg": ["\u041f\u043e\u0434\u0441\u043b\u0430\u0434\u0438\u0442\u0435\u043b\u0438"], "ja": ["\u7518\u5473\u6599"], "es": ["Endulzantes", "Edulcorantes"], "ru": ["\u041f\u043e\u0434\u0441\u043b\u0430\u0441\u0442\u0438\u0442\u0435\u043b\u0438 \u0438 \u0437\u0430\u043c\u0435\u043d\u0438\u0442\u0435\u043b\u0438 \u0441\u0430\u0445\u0430\u0440\u0430"], "da": ["S\u00f8demidler"], "pt": ["Ado\u00e7antes"], "pl": ["Produkty wykorzystywane do s\u0142odzenia", "s\u0142odz\u0105ce"]}, "children": ["en:honey-based-preparations", "en:honeys", "en:simple-syrups", "en:sugar-substitutes", "en:sugars", "en:sweetener-variety-packs"], "ignore_energy_calculated_error": {"en": "yes"}, "gpc_category_code": {"en": "10000043"}}, "en:entremets-mousses-and-creamy-puddings": {"synonyms": {"fr": ["Entremets - mousses et cr\u00e8me desserts"], "en": ["Entremets - mousses and creamy puddings"]}, "children": ["en:creamy-puddings", "en:dairy-mousses"], "intake24_category_code": {"fr": "MOUS"}, "name": {"fr": "Entremets - mousses et cr\u00e8me desserts", "en": "Entremets - mousses and creamy puddings"}, "parents": ["en:dairy-desserts"]}, "en:beverages-and-beverages-preparations": {"synonyms": {"hr": ["Pi\u0107a i pripravci za pi\u0107e"], "it": ["Bevande e preparati per bevande"], "es": ["Bebidas y preparaciones de bebidas"], "nl": ["Dranken en drankbereidingen"], "en": ["Beverages and beverages preparations", "Beverages and preparations"], "de": ["Getr\u00e4nke und Getr\u00e4nkezubereitungen"], "fr": ["Boissons et pr\u00e9parations de boissons"]}, "children": ["en:beverage-preparations", "en:beverages"], "incompatible_with": {"en": "categories:en:meals"}, "description": {"en": "This mixes ready to drink beverages and not-ready to drink beverages"}, "name": {"de": "Getr\u00e4nke und Getr\u00e4nkezubereitungen", "fr": "Boissons et pr\u00e9parations de boissons", "hr": "Pi\u0107a i pripravci za pi\u0107e", "it": "Bevande e preparati per bevande", "es": "Bebidas y preparaciones de bebidas", "nl": "Dranken en drankbereidingen", "en": "Beverages and beverages preparations"}}, "en:biscuits-and-crackers": {"children": ["en:biscuits", "en:crackers"], "synonyms": {"en": ["Biscuits and crackers"], "fr": ["Biscuits sucr\u00e9s & biscuits ap\u00e9ritifs"]}, "intake24_category_code": {"en": "BSCT"}, "name": {"fr": "Biscuits sucr\u00e9s & biscuits ap\u00e9ritifs", "en": "Biscuits and crackers"}}} \ No newline at end of file diff --git a/tests/int/data/test_labels.full.json b/tests/int/data/test_labels.full.json new file mode 100644 index 00000000..eac3bf0e --- /dev/null +++ b/tests/int/data/test_labels.full.json @@ -0,0 +1 @@ +{"en:fair-trade": {"opposite": {"en": "en:non-fair-trade"}, "description": {"fr": "Le commerce \u00e9quitable est un syst\u00e8me d'\u00e9change dont l'objectif est de proposer une plus grande \u00e9quit\u00e9 dans le commerce conventionnel, voire une alternative \u00e0 celui-ci, bas\u00e9e notamment sur la r\u00e9appropriation des \u00e9changes marchands par ceux qui les pratiquent.", "en": "Fair trade is an arrangement designed to help producers in developing countries achieve sustainable and equitable trade relationships. Members of the fair trade movement add the payment of higher prices to exporters, as well as improved social and environmental standards.", "hu": "A m\u00e9lt\u00e1nyos kereskedelem olyan ir\u00e1ny\u00edtott kereskedelem, amely hangs\u00falyt helyez arra, hogy a harmadik vil\u00e1gbeli termel\u0151 megkapja az \u0151t megillet\u0151 p\u00e9nzt a term\u00e9ny\u00e9\u00e9rt.", "pt": "O com\u00e9rcio justo \u00e9 um acordo concebido para ajudar os produtores em pa\u00edses em desenvolvimento a alcan\u00e7arem rela\u00e7\u00f5es comerciais sustent\u00e1veis e justas.", "es": "El comercio justo es una forma alternativa de comercio promovida por varias ONG -organizaciones no gubernamentales-, por la Organizaci\u00f3n de las Naciones Unidas y por los movimientos sociales y pol\u00edticos -como el pacifismo y el ecologismo- que promueven una relaci\u00f3n comercial voluntaria y justa entre productores y consumidores.", "de": "Als Fairer Handel -englisch fair trade- wird ein kontrollierter Handel bezeichnet, bei dem die Erzeuger f\u00fcr ihre Produkte einen Mindestpreis erhalten, der von einer Fair-Trade-Organisation bestimmt wird. Damit soll den Produzenten auch bei niedrigeren Marktpreisen ein h\u00f6heres und verl\u00e4sslicheres Einkommen als im herk\u00f6mmlichen Handel erm\u00f6glicht werden.", "nl": "Eerlijke handel is internationale handel die gericht is op duurzame ontwikkeling in ontwikkelingslanden, met name bij de export van zulke landen naar rijkere westerse landen. Fair trade duidt op een streven om boeren voor hun exportproducten een prijs te geven die in verhouding staat tot de werkelijke productiekosten, en niet een prijs die wordt bepaald door de verhoudingen op de internationale markt."}, "synonyms": {"pl": ["Fair trade"], "fi": ["Reilu kauppa"], "pt": ["Com\u00e9rcio justo", "fair trade"], "de": ["Fairer Handel"], "bg": ["\u0421\u043f\u0440\u0430\u0432\u0435\u0434\u043b\u0438\u0432\u0430 \u0442\u044a\u0440\u0433\u043e\u0432\u0438\u044f"], "xx": ["Fair trade"], "cs": ["Fair trade"], "it": ["Commercio equo"], "he": ["\u05e1\u05d7\u05e8 \u05d4\u05d5\u05d2\u05df"], "es": ["Comercio justo", "Fairtrade-Comercio Justo"], "ca": ["Comer\u00e7 Just"], "nl": ["Fairtrade", "Rechtvaardige handel", "fair trade"], "fr": ["Commerce \u00e9quitable", "\u00e9quitable", "issu du commerce \u00e9quitable", "issus du commerce \u00e9quitable", "issue du commerce \u00e9quitable", "issues du commerce \u00e9quitable", "ingr\u00e9dients issus du commerce \u00e9quitable", "produits issus du commerce \u00e9quitable", "ingr\u00e9dient issu du commerce \u00e9quitable", "Ingr\u00e9dients conformes aux standards du commerce \u00e9quitable Fairtrade/Max Havelaar", "ingr\u00e9dients conformes aux standards du commerce \u00e9quitable", "100% du total des ingr\u00e9dients d'origine agricole sont conformes aux standards du commerce \u00e9quitable"], "hu": ["M\u00e9lt\u00e1nyos kereskedelem", "Fair trade", "becs\u00fcletes kereskedelem"], "en": ["Fair trade"]}, "incompatible_with": {"en": "labels:en:non-fair-trade"}, "name": {"he": "\u05e1\u05d7\u05e8 \u05d4\u05d5\u05d2\u05df", "it": "Commercio equo", "cs": "Fair trade", "xx": "Fair trade", "hu": "M\u00e9lt\u00e1nyos kereskedelem", "en": "Fair trade", "fr": "Commerce \u00e9quitable", "nl": "Fairtrade", "ca": "Comer\u00e7 Just", "es": "Comercio justo", "pt": "Com\u00e9rcio justo", "pl": "Fair trade", "fi": "Reilu kauppa", "bg": "\u0421\u043f\u0440\u0430\u0432\u0435\u0434\u043b\u0438\u0432\u0430 \u0442\u044a\u0440\u0433\u043e\u0432\u0438\u044f", "de": "Fairer Handel"}, "label_categories": {"en": "en:Social responsibility"}, "children": ["en:fair-for-life", "en:fair-trade-organic", "en:fairtrade-cocoa", "en:fairtrade-international", "en:fairtrade-usa", "en:small-producers-symbol", "en:transfer", "fr:agri-ethique-france", "fr:bio-equitable", "fr:bio-equitable-en-france", "fr:biopartenaire"], "wikidata": {"en": "Q188485"}}, "en:nutriscore-grade-a": {"parents": ["en:nutriscore"], "label_categories": {"en": "en:Nutrition Grades, en:Health"}, "synonyms": {"bg": ["Nutriscore A"], "de": ["Nutriscore Note A"], "fi": ["Nutriscore-arvosana A"], "pt": ["Nutri-Score grau A"], "da": ["Nutriscore A"], "en": ["Nutriscore Grade A"], "fr": ["Nutriscore A"], "nl": ["Nutriscore A"], "es": ["Nutriscore A"], "ca": ["Grau A Nutriscore"], "he": ["\u05d3\u05d9\u05e8\u05d5\u05d2 A \u05d1\u05beNutriscore"], "it": ["Nutriscore A"], "hr": ["Nutriscore oznaka A"], "xx": ["Nutriscore A"]}, "name": {"da": "Nutriscore A", "pt": "Nutri-Score grau A", "fi": "Nutriscore-arvosana A", "bg": "Nutriscore A", "de": "Nutriscore Note A", "it": "Nutriscore A", "he": "\u05d3\u05d9\u05e8\u05d5\u05d2 A \u05d1\u05beNutriscore", "hr": "Nutriscore oznaka A", "xx": "Nutriscore A", "fr": "Nutriscore A", "en": "Nutriscore Grade A", "ca": "Grau A Nutriscore", "es": "Nutriscore A", "nl": "Nutriscore A"}}, "en:nutriscore-grade-d": {"name": {"de": "Nutriscore D", "bg": "Nutriscore D", "da": "Nutriscore D", "fi": "Nutriscore-arvosana D", "pt": "Nutri-Score grau D", "es": "Nutriscore D", "ca": "Grau D NutriScore", "nl": "Nutriscore D", "fr": "Nutriscore D", "en": "Nutriscore Grade D", "hr": "Nutriscore oznaka D", "xx": "Nutriscore D", "it": "Nutriscore D", "he": "\u05d3\u05d9\u05e8\u05d5\u05d2 D \u05d1\u05beNutriscore"}, "synonyms": {"de": ["Nutriscore D"], "bg": ["Nutriscore D"], "pt": ["Nutri-Score grau D"], "fi": ["Nutriscore-arvosana D"], "da": ["Nutriscore D"], "nl": ["Nutriscore D"], "ca": ["Grau D NutriScore"], "es": ["Nutriscore D"], "en": ["Nutriscore Grade D"], "fr": ["Nutriscore D"], "xx": ["Nutriscore D"], "hr": ["Nutriscore oznaka D"], "he": ["\u05d3\u05d9\u05e8\u05d5\u05d2 D \u05d1\u05beNutriscore"], "it": ["Nutriscore D"]}, "label_categories": {"en": "en:Nutrition Grades, en:Health"}, "parents": ["en:nutriscore"]}, "en:fr-bio-01": {"country": {"en": "france"}, "parents": ["en:eu-organic"], "name": {"xx": "FR-BIO-01", "pt": "FR-BIO-01", "it": "FR-BIO-01", "nl": "FR-BIO-01", "ca": "FR-BIO-01", "es": "FR-BIO-01", "en": "FR-BIO-01", "fr": "FR-BIO-01"}, "synonyms": {"es": ["FR-BIO-01"], "ca": ["FR-BIO-01"], "nl": ["FR-BIO-01"], "fr": ["FR-BIO-01", "Ecocert-FR-BIO-01"], "en": ["FR-BIO-01"], "xx": ["FR-BIO-01"], "it": ["FR-BIO-01"], "pt": ["FR-BIO-01"]}}, "fr:ab-agriculture-biologique": {"wikidata": {"en": "Q396709"}, "parents": ["en:eu-organic"], "country": {"en": "france"}, "name": {"xx": "AB Agriculture Biologique", "fr": "AB Agriculture Biologique"}, "synonyms": {"fr": ["AB Agriculture Biologique", "AB", "Agence Bio", "ab bio", "bio ab", "Ab-certifie-agriculture-biologique", "ab certifi\u00e9", "AB France"], "xx": ["AB Agriculture Biologique"]}}, "en:eu-non-eu-agriculture": {"name": {"bg": "\u0417\u0435\u043c\u0435\u0434\u0435\u043b\u0438\u0435 \u043e\u0442/\u0438\u0437\u0432\u044a\u043d \u0415\u0421", "de": "EU-/Nicht-EU-Landwirtschaft", "pt": "Agricultura EU/n\u00e3o EU", "fi": "Tuotettu EU:ssa ja EU:n ulkopuolella", "en": "EU/non-EU Agriculture", "hu": "EU/nem-EU mez\u0151gazdas\u00e1g", "fr": "Agriculture UE/Non UE", "nl": "EU/Non EU landbouw", "ca": "Procedent d'agricultura UE/no UE", "es": "Agricultura UE/no UE", "lt": "ES / ne ES \u017eem\u0117s \u016bkis", "it": "Agricoltura EU/non-EU", "cs": "Zem\u011bd\u011blstv\u00ed EU a mimo EU", "ru": "EU/\u043d\u0435-EU \u0430\u0433\u0440\u043e\u043a\u0443\u043b\u044c\u0442\u0443\u0440\u0430"}, "synonyms": {"en": ["EU/non-EU Agriculture", "EU and non EU Agriculture"], "hu": ["EU/nem-EU mez\u0151gazdas\u00e1g"], "fr": ["Agriculture UE/Non UE"], "nl": ["EU/Non EU landbouw"], "es": ["Agricultura UE/no UE"], "ca": ["Procedent d'agricultura UE/no UE", "Agricultura UE/no UE"], "it": ["Agricoltura EU/non-EU"], "lt": ["ES / ne ES \u017eem\u0117s \u016bkis", "ES ir ne ES \u017eem\u0117s \u016bkis"], "cs": ["Zem\u011bd\u011blstv\u00ed EU a mimo EU"], "ru": ["EU/\u043d\u0435-EU \u0430\u0433\u0440\u043e\u043a\u0443\u043b\u044c\u0442\u0443\u0440\u0430"], "bg": ["\u0417\u0435\u043c\u0435\u0434\u0435\u043b\u0438\u0435 \u043e\u0442/\u0438\u0437\u0432\u044a\u043d \u0415\u0421"], "de": ["EU-/Nicht-EU-Landwirtschaft"], "fi": ["Tuotettu EU:ssa ja EU:n ulkopuolella"], "pt": ["Agricultura EU/n\u00e3o EU", "Agricultura da Uni\u00e3o Europeia EU/n\u00e3o Uni\u00e3o Europeia"]}, "parents": ["en:eu-agriculture", "en:non-eu-agriculture"]}, "en:no-preservatives": {"children": ["en:no-dyes-or-preservatives", "en:no-trans-fat-and-preservatives"], "name": {"bg": "\u0411\u0435\u0437 \u043a\u043e\u043d\u0441\u0435\u0440\u0432\u0430\u043d\u0442\u0438", "da": "Uden konserveringsmiddel", "pt": "Sem conservantes", "pl": "Bez konserwant\u00f3w", "ca": "Sense conservants", "es": "Sin conservantes", "fr": "Sans conservateurs", "en": "No preservatives", "hr": "Bez konzervansa", "it": "Senza conservanti", "lt": "Be konservant\u0173", "ro": "F\u0103r\u0103 conservan\u021bi", "he": "\u05dc\u05dc\u05d0 \u05de\u05e9\u05de\u05e8\u05d9\u05dd", "tr": "Koruyucu yok", "de": "Ohne Konservierungsstoffe", "th": "\u0e44\u0e21\u0e48\u0e43\u0e0a\u0e49\u0e27\u0e31\u0e15\u0e16\u0e38\u0e01\u0e31\u0e19\u0e40\u0e2a\u0e35\u0e22", "fi": "S\u00e4il\u00f6nt\u00e4aineeton", "nl": "Conserveermiddelvrij", "zh": "\u65e0\u9632\u8150\u5242", "hu": "Tart\u00f3s\u00edt\u00f3szermentes", "sv": "Utan konserveringsmedel", "cs": "Bez konzervant\u016f", "ru": "\u0411\u0435\u0437 \u043a\u043e\u043d\u0441\u0435\u0440\u0432\u0430\u043d\u0442\u043e\u0432"}, "synonyms": {"de": ["Ohne Konservierungsstoffe", "Ohne Zusatz von Konservierungsstoffen"], "th": ["\u0e44\u0e21\u0e48\u0e43\u0e0a\u0e49\u0e27\u0e31\u0e15\u0e16\u0e38\u0e01\u0e31\u0e19\u0e40\u0e2a\u0e35\u0e22"], "fi": ["s\u00e4il\u00f6nt\u00e4aineeton", "ei s\u00e4il\u00f6nt\u00e4aineita", "ilman s\u00e4il\u00f6nt\u00e4aineita", "ei lis\u00e4ttyj\u00e4 s\u00e4il\u00f6nt\u00e4aineita", "ei sis\u00e4ll\u00e4 s\u00e4il\u00f6nt\u00e4aineita"], "nl": ["Conserveermiddelvrij"], "hu": ["Tart\u00f3s\u00edt\u00f3szermentes", "hozz\u00e1adott tart\u00f3s\u00edt\u00f3szer n\u00e9lk\u00fcl", "tart\u00f3s\u00edt\u00f3szert nem tartalmaz"], "zh": ["\u65e0\u9632\u8150\u5242"], "ru": ["\u0411\u0435\u0437 \u043a\u043e\u043d\u0441\u0435\u0440\u0432\u0430\u043d\u0442\u043e\u0432"], "cs": ["Bez konzervant\u016f"], "sv": ["Utan konserveringsmedel"], "bg": ["\u0411\u0435\u0437 \u043a\u043e\u043d\u0441\u0435\u0440\u0432\u0430\u043d\u0442\u0438"], "pl": ["Bez konserwant\u00f3w", "bez dodatku konserwant\u00f3w", "bez substancji konserwuj\u0105cych", "bez dodatku substancji konserwuj\u0105cych"], "pt": ["Sem conservantes", "sem conservantes adicionados", "sem conservante", "sem conservante adicionado"], "da": ["Uden konserveringsmiddel"], "es": ["Sin conservantes", "Sin conservantes a\u00f1adidos", "Sin conservadores", "Sin conservadores a\u00f1adidos"], "ca": ["Sense conservants", "sense conservants afegits", "lliure de conservants artificials"], "en": ["No preservatives", "no added preservatives", "preservatives free"], "fr": ["Sans conservateurs", "sans conservateur ajout\u00e9", "sans ajout de conservateur"], "hr": ["bez konzervansa"], "he": ["\u05dc\u05dc\u05d0 \u05de\u05e9\u05de\u05e8\u05d9\u05dd", "\u05dc\u05dc\u05d0 \u05d7\u05d5\u05de\u05e8\u05d9\u05dd \u05de\u05e9\u05de\u05e8\u05d9\u05dd", "\u05dc\u05dc\u05d0 \u05d7\u05d5\u05de\u05e8\u05d9\u05dd \u05de\u05e9\u05de\u05e8\u05d9\u05dd \u05de\u05dc\u05d0\u05db\u05d5\u05ea\u05d9\u05d9\u05dd"], "tr": ["Koruyucu yok", "Koruyucu madde i\u00e7ermez"], "ro": ["F\u0103r\u0103 conservan\u021bi"], "lt": ["Be konservant\u0173", "be prid\u0117t\u0173 konservant\u0173"], "it": ["Senza conservanti", "Senza aggiunta di conservanti", "Senza conservanti aggiunti"]}}, "en:transformed-in-france": {"manufacturing_places": {"en": "en:france"}, "synonyms": {"ca": ["Transformat a Fran\u00e7a"], "de": ["Verarbeitet in Frankreich"], "es": ["Transformado en Francia"], "pt": ["Transformado em Fran\u00e7a", "transformad na Fran\u00e7a"], "fi": ["Prosessoitu Ranskassa"], "en": ["Transformed in France"], "it": ["Lavorato in Francia"], "fr": ["Transform\u00e9 en France"]}, "name": {"es": "Transformado en Francia", "de": "Verarbeitet in Frankreich", "ca": "Transformat a Fran\u00e7a", "fi": "Prosessoitu Ranskassa", "en": "Transformed in France", "pt": "Transformado em Fran\u00e7a", "fr": "Transform\u00e9 en France", "it": "Lavorato in Francia"}, "label_categories": {"en": "en:Geographic label"}}, "en:no-lactose": {"incompatible_with": {"en": "labels:en:contains-lactose"}, "name": {"ca": "Sense lactosa", "es": "Sin lactosa", "en": "No lactose", "fr": "Sans lactose", "sk": "Bez laktozy", "hr": "Bez laktoze", "he": "\u05e0\u05d8\u05d5\u05dc \u05dc\u05e7\u05d8\u05d5\u05d6", "tr": "Laktozsuz", "it": "Senza lattosio", "bg": "\u0411\u0435\u0437 \u043b\u0430\u043a\u0442\u043e\u0437\u0430", "pt": "Sem lactose", "pl": "Bez laktozy", "da": "Laktosefri", "cz": "Bez laktozy", "nl": "Lactosevrij", "hu": "Lakt\u00f3zmentes", "ru": "\u0411\u0435\u0437 \u043b\u0430\u043a\u0442\u043e\u0437\u044b", "cs": "Bez lakt\u00f3zy", "sv": "Laktosfri", "nb": "Laktosefri", "de": "Laktosefrei", "th": "\u0e44\u0e21\u0e48\u0e21\u0e35\u0e41\u0e25\u0e04\u0e42\u0e15\u0e2a", "fi": "Laktoositon"}, "opposite": {"en": "en:contains-lactose"}, "synonyms": {"fr": ["sans lactose"], "en": ["No lactose", "lactose-free", "Lactose free", "without lactose"], "ca": ["Sense lactosa", "lliure de lactosa"], "es": ["Sin lactosa"], "it": ["Senza lattosio", "Naturalmente senza lattosio"], "he": ["\u05e0\u05d8\u05d5\u05dc \u05dc\u05e7\u05d8\u05d5\u05d6"], "tr": ["Laktozsuz"], "sk": ["Bez laktozy"], "hr": ["bez laktoze"], "bg": ["\u0411\u0435\u0437 \u043b\u0430\u043a\u0442\u043e\u0437\u0430"], "da": ["Laktosefri"], "pt": ["Sem lactose", "0% lactose", "livre de lactose"], "pl": ["Bez laktozy"], "hu": ["Lakt\u00f3zmentes", "lakt\u00f3z mentes"], "cz": ["Bez laktozy"], "nl": ["Lactosevrij", "Vrij van lactose"], "sv": ["Laktosfri", "Laktosfritt"], "cs": ["Bez lakt\u00f3zy"], "ru": ["\u0411\u0435\u0437 \u043b\u0430\u043a\u0442\u043e\u0437\u044b"], "th": ["\u0e44\u0e21\u0e48\u0e21\u0e35\u0e41\u0e25\u0e04\u0e42\u0e15\u0e2a"], "de": ["Laktosefrei", "Lactosefrei"], "nb": ["Laktosefri", "Uten laktose", "Naturlig laktosefri"], "fi": ["laktoositon", "ei laktoosia", "ilman laktoosia"]}}, "en:max-havelaar": {"parents": ["en:fairtrade-international"], "synonyms": {"ca": ["Max Havelaar", "Comer\u00e7 Just/Max Havelaar"], "xx": ["Max Havelaar"], "en": ["Max Havelaar", "Max Havelar", "Fairtrade/Max Havelaar"], "it": ["Max Havelaar"]}, "name": {"en": "Max Havelaar", "it": "Max Havelaar", "ca": "Max Havelaar", "xx": "Max Havelaar"}, "wikidata": {"en": "Q694008"}, "children": ["en:max-havelaar-belgium", "en:max-havelaar-france"]}, "en:organic": {"name": {"bg": "\u0411\u0438\u043e", "th": "\u0e2d\u0e2d\u0e23\u0e4c\u0e41\u0e01\u0e19\u0e34\u0e04", "de": "Bio", "fi": "Luomu", "pl": "Ekologiczny", "pt": "Org\u00e2nico", "da": "\u00d8kologisk", "ko": "\uc720\uae30\ub18d", "hu": "Bio", "en": "Organic", "zh": "\u6709\u673a", "fr": "Bio", "nl": "Biologisch", "es": "Ecol\u00f3gico", "ca": "Org\u00e0nic", "he": "\u05d0\u05d5\u05e8\u05d2\u05e0\u05d9", "tr": "Organik", "lt": "Ekologi\u0161kas", "it": "Biologico", "cs": "Bio", "hr": "Ekolo\u0161ki uzgoj", "sv": "Ekologisk"}, "label_categories": {"en": "en:Environnement"}, "environmental_benefits": {"es": "La agricultura org\u00e1nica contribuye a preservar la biodiversidad, el clima, la calidad del agua y la fertilidad del suelo.", "en": "Organic agriculture contributes to preserve biodiversity, climate, water quality and soil fertility.", "fr": "L'agriculture biologique contribue \u00e0 pr\u00e9server la biodiversit\u00e9, le climat, la qualit\u00e9 de l'eau et la fertilit\u00e9 des sols.", "sk": "Ekologick\u00e9 po\u013enohospod\u00e1rstvo prispieva k zachovaniu biodiverzity, kl\u00edmy, kvality vody a p\u00f4dnej \u00farodnosti.", "hr": "Organska poljoprivreda doprinosi o\u010duvanju biolo\u0161ke raznolikosti, klime, kvalitete vode i plodnosti tla.", "tr": "Organik tar\u0131m, biyolojik \u00e7e\u015fitlilik, iklim, su kalitesi ve toprak verimlili\u011fini korumaya katk\u0131da bulunur.", "it": "L'agricoltura biologica contribuisce a preservare la biodiversit\u00e0, il clima, la qualit\u00e0 dell'acqua e la fertilit\u00e0 del suolo.", "ro": "Agricultura organic\u0103 contribuie la conservarea biodiversit\u0103\u021bii, a climei, a calit\u0103\u021bii apei \u0219i a fertilit\u0103\u021bii solului.", "el": "\u0397 \u03b2\u03b9\u03bf\u03bb\u03bf\u03b3\u03b9\u03ba\u03ae \u03b3\u03b5\u03c9\u03c1\u03b3\u03af\u03b1 \u03c3\u03c5\u03bc\u03b2\u03ac\u03bb\u03bb\u03b5\u03b9 \u03c3\u03c4\u03b7 \u03b4\u03b9\u03b1\u03c4\u03ae\u03c1\u03b7\u03c3\u03b7 \u03c4\u03b7\u03c2 \u03b2\u03b9\u03bf\u03c0\u03bf\u03b9\u03ba\u03b9\u03bb\u03cc\u03c4\u03b7\u03c4\u03b1\u03c2, \u03c4\u03bf\u03c5 \u03ba\u03bb\u03af\u03bc\u03b1\u03c4\u03bf\u03c2, \u03c4\u03b7\u03c2 \u03c0\u03bf\u03b9\u03cc\u03c4\u03b7\u03c4\u03b1\u03c2 \u03c4\u03bf\u03c5 \u03bd\u03b5\u03c1\u03bf\u03cd \u03ba\u03b1\u03b9 \u03c4\u03b7\u03c2 \u03b3\u03bf\u03bd\u03b9\u03bc\u03cc\u03c4\u03b7\u03c4\u03b1\u03c2 \u03c4\u03bf\u03c5 \u03b5\u03b4\u03ac\u03c6\u03bf\u03c5\u03c2.", "ko": "\uc720\uae30\ub18d\uc740 \uc0dd\ubb3c \ub2e4\uc591\uc131, \uae30\ud6c4, \uc218\uc9c8 \ubc0f \ud1a0\uc591\uc758 \ube44\uc625\uc131 \ubcf4\uc874\uc5d0 \uae30\uc5ec\ud569\ub2c8\ub2e4.", "pt": "A agricultura org\u00e2nica contribui para preservar a biodiversidade, o clima, a qualidade da \u00e1gua e a fertilidade do solo.", "pl": "Rolnictwo ekologiczne przyczynia si\u0119 do ochrony bior\u00f3\u017cnorodno\u015bci, klimatu, jako\u015bci wody i \u017cyzno\u015bci gleby.", "lv": "Biolo\u0123isk\u0101 lauksaimniec\u012bba veicina biolo\u0123isk\u0101s daudzveid\u012bbas, klimata, \u016bdens kvalit\u0101tes un augsnes augl\u012bbas saglab\u0101\u0161anu.", "da": "\u00d8kologisk landbrug bidrager til at bevare biodiversitet, klima, vandkvalitet og jordfrugtbarhed.", "nl": "Biologische landbouw draagt bij aan het behoud van biodiversiteit, klimaat, waterkwaliteit en bodemvruchtbaarheid.", "zh": "\u6709\u673a\u519c\u4e1a\u6709\u52a9\u4e8e\u4fdd\u62a4\u751f\u7269\u591a\u6837\u6027\u3001\u6c14\u5019\u3001\u6c34\u8d28\u548c\u571f\u58e4\u80a5\u529b\u3002", "hu": "A bio mez\u0151gazdas\u00e1g hozz\u00e1j\u00e1rul a biodiverzit\u00e1s, az \u00e9ghajlat, a v\u00edzmin\u0151s\u00e9g \u00e9s a talaj term\u00e9kenys\u00e9g\u00e9nek meg\u0151rz\u00e9s\u00e9hez.", "ru": "\u041e\u0440\u0433\u0430\u043d\u0438\u0447\u0435\u0441\u043a\u043e\u0435 \u0437\u0435\u043c\u043b\u0435\u0434\u0435\u043b\u0438\u0435 \u0441\u043f\u043e\u0441\u043e\u0431\u0441\u0442\u0432\u0443\u0435\u0442 \u0441\u043e\u0445\u0440\u0430\u043d\u0435\u043d\u0438\u044e \u0431\u0438\u043e\u0440\u0430\u0437\u043d\u043e\u043e\u0431\u0440\u0430\u0437\u0438\u044f, \u043a\u043b\u0438\u043c\u0430\u0442\u0430, \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0430 \u0432\u043e\u0434\u044b \u0438 \u043f\u043b\u043e\u0434\u043e\u0440\u043e\u0434\u0438\u044f \u043f\u043e\u0447\u0432\u044b.", "cs": "Ekologick\u00e9 zem\u011bd\u011blstv\u00ed p\u0159isp\u00edv\u00e1 k ochran\u011b biodiverzity, klimatu, kvality vody a \u00farodnosti p\u016fdy.", "sv": "Ekologiskt jordbruk bidrar till att bevara biologisk m\u00e5ngfald, klimat, vattenkvalitet och markfruktbarhet.", "ar": "\u0627\u0644\u0632\u0631\u0627\u0639\u0629 \u0627\u0644\u0639\u0636\u0648\u064a\u0629 \u062a\u0633\u0627\u0647\u0645 \u0641\u064a \u0627\u0644\u062d\u0641\u0627\u0638 \u0639\u0644\u0649 \u0627\u0644\u062a\u0646\u0648\u0639 \u0627\u0644\u0628\u064a\u0648\u0644\u0648\u062c\u064a \u0648\u0627\u0644\u0645\u0646\u0627\u062e \u0648\u062c\u0648\u062f\u0629 \u0627\u0644\u0645\u064a\u0627\u0647 \u0648\u062e\u0635\u0648\u0628\u0629 \u0627\u0644\u062a\u0631\u0628\u0629.", "de": "Die \u00f6kologische Landwirtschaft tr\u00e4gt zur Erhaltung der Artenvielfalt, des Klimas, der Wasserqualit\u00e4t und der Bodenfruchtbarkeit bei.", "ja": "\u6709\u6a5f\u8fb2\u696d\u306f\u751f\u7269\u591a\u69d8\u6027\u3001\u6c17\u5019\u3001\u6c34\u8cea\u3001\u571f\u58cc\u306e\u80a5\u6c83\u3055\u306e\u4fdd\u8b77\u306b\u8ca2\u732e\u3057\u307e\u3059\u3002", "sl": "Ekolo\u0161ko kmetijstvo prispeva k ohranjanju biotske raznovrstnosti, podnebja, kakovosti vode in rodovitnosti tal.", "fi": "Luomuviljely edist\u00e4\u00e4 biodiversiteetin, ilmaston, veden laadun ja maaper\u00e4n hedelm\u00e4llisyyden s\u00e4ilymist\u00e4."}, "children": ["de:bio-7-initiative", "en:austria-bio-garantie", "en:bio-austria", "en:bio-suisse", "en:biodynamic-agriculture", "en:biogarantie", "en:biokreis", "en:bioland", "en:canada-organic", "en:catalan-council-of-organic-production", "en:danish-state-controlled-organic", "en:debio-organic", "en:ecoveg", "en:eu-organic", "en:fair-trade-organic", "en:farm-verified-organic", "en:finnish-organic-association", "en:india-organic", "en:luomu-controlled-organic-production", "en:migros-bio", "en:naturaplan", "en:naturland", "en:soil-association-organic", "en:tun-certified-organic", "en:usda-organic", "es:comite-aragones-de-agricultura-ecologica", "es:comite-de-agricultura-ecologica", "es:comite-de-agricultura-ecologica-de-la-comunidad-de-madrid", "es:comite-de-agricultura-ecologica-de-la-comunitat-valenciana", "es:consejo-de-agricultura-ecologica-de-la-region-de-murcia", "es:organico-argentina", "fr:bio-coherence", "fr:bio-equitable", "fr:bio-equitable-en-france", "fr:bio-solidaire", "fr:biopartenaire", "fr:nature-et-progres", "it:icea-bio-vegan", "it:icea-bio-vegetariano", "it:icea-biologico", "pt:produto-organico-brasil", "sv:krav"], "wikidata": {"en": "Q380778"}, "opposite": {"en": "en:non-organic"}, "synonyms": {"nl": ["Biologisch", "biologische", "van biologische oorsprong", "van biologische teelt", "van gecontroleerd biologische teelt", "afkomstig van biologische productiemethode", "van gecontroleerde biologische landbouw"], "es": ["Ecol\u00f3gico", "ecol\u00f3gica", "Producto ecol\u00f3gico", "Biol\u00f3gico", "Producto biol\u00f3gico", "Org\u00e1nico", "Producto org\u00e1nico", "Eco", "Bio", "Ingredientes ecol\u00f3gicos", "Ingrediente ecol\u00f3gico", "ingredientes procedentes de la agricultura ecol\u00f3gica", "ingrediente procedente de la agricultura ecol\u00f3gica", "agricultura ecol\u00f3gica", "cumple con el reglamento de agricultura ecol\u00f3gica CE 2092/91", "procedentes de la agricultura ecol\u00f3gica", "procedente de la agricultura ecol\u00f3gica", "de agricultura ecol\u00f3gica", "de agricultura ecologica", "Procedente e agricultura ecol\u00f3gica", "de cultivo ecol\u00f3gico"], "ca": ["Org\u00e0nic", "de cultiu ecologic"], "hu": ["Bio", "\u00d6ko", "Organikus", "biogazd\u00e1lkod\u00e1sb\u00f3l", "ellen\u0151rz\u00f6tt \u00f6kol\u00f3giai gazd\u00e1lkod\u00e1sb\u00f3l"], "en": ["Organic", "organically grown", "organically produced", "ingredient produced organically", "from organic farming", "From Organic Agriculture", "organic ingredients"], "zh": ["\u6709\u673a"], "fr": ["Bio", "agriculture biologique", "biologique", "organic", "organique", "issu de l'agriculture biologique", "issus de l'agriculture biologique", "issue de l'agriculture biologique", "issues de l'agriculture biologique", "ingr\u00e9dient issu de l'agriculture biologique", "ingr\u00e9dients issus de l'agriculture biologique", "Les-ingredients-sont-issus-d-une-agriculture-biologique", "ingr\u00e9dients agricoles issus de l'agriculture biologique", "produits issus de l'agriculture biologique", "produit issu de l'agriculture biologique", "ingr\u00e9dient agricole issu de l'agriculture biologique", "tous les ingr\u00e9dients agricoles sont issus de l'agriculture biologique", "les ingr\u00e9dients agricoles sont issus de l'agriculture biologique", "tous les ingr\u00e9dients sont issus de l'agriculture biologique", "ingr\u00e9dients bio", "ingr\u00e9dients biologiques", "ingr\u00e9dient bio", "ingr\u00e9dient biologique", "issue de l'agriculture biologique controlee"], "cs": ["Bio"], "hr": ["ekolo\u0161ki uzgoj", "iz ekolo\u0161kog uzgoja", "iz kontroliranog ekolo\u0161kog uzgoja", "iz kontrolirane ekolo\u0161ke proizvodnje", "iz kontroliranog biolo\u0161kog uzgoja", "iz kontroliranog organskog uzgoja"], "sv": ["Ekologisk", "Ekologiskt", "Ekologiska"], "he": ["\u05d0\u05d5\u05e8\u05d2\u05e0\u05d9"], "tr": ["Organik"], "it": ["Biologico", "biologici", "da agricoltura biologica"], "lt": ["Ekologi\u0161kas", "ekologi\u0161kai u\u017eaugintas", "ekologi\u0161kai pagamintas", "ingredientas pagamintas ekologi\u0161kai", "i\u0161 ekologi\u0161ko \u017eem\u0117s \u016bkio", "i\u0161 ekologi\u0161ko \u017eem\u0117s \u016bkio"], "de": ["Bio", "\u00d6ko", "biologisch", "biologische", "biologischer", "biologisches", "\u00d6kologisch", "Kontrolliert biologisch", "Kontrolliert \u00f6kologisch", "biologische Landwirtschaft", "aus biologischer Landwirtschaft", "\u00f6kologische Landwirtschaft", "biologischer Landbau", "\u00f6kologischer Landbau", "aus kontrolliert biologischem Anbau", "biologisch gewonnen", "aus kontrolliert biologischer landwirtschaft", "aus kontrolliert \u00f6kologischer erzeugung", "aus kontrolliert \u00f6kologischer landwirtschaft", "aus kontrolliert \u00f6kologischem anbau", "aus kontrolliert \u00f6kologischem landbau", "aus kontrollierter biologischer landwirtschaft", "aus kontrollierter \u00f6kologischer landwirtschaft", "kontrolliert biologischer anbau"], "th": ["\u0e2d\u0e2d\u0e23\u0e4c\u0e41\u0e01\u0e19\u0e34\u0e04"], "bg": ["\u0411\u0438\u043e", "\u0431\u0438\u043e\u043b\u043e\u0433\u0438\u0447\u043d\u043e \u0437\u0435\u043c\u0435\u0434\u0435\u043b\u0438\u0435", "\u0431\u0438\u043e\u043b\u043e\u0433\u0438\u0447\u043d\u043e"], "ko": ["\uc720\uae30\ub18d"], "pl": ["ekologiczny", "bio", "ekologiczne", "ekologiczna", "ekologicznych", "ekologicznego", "z kontrolowanych upraw ekologicznych", "z kontrolowanej produkcji ekologicznej", "z kontrolowanego rolnictwa ekologicznego", "certyfikowany sk\u0142adnik ekologiczny", "sk\u0142adnik ekologiczny", "produkt pochodz\u0105cy z kontrolowanych certyfikowanych upraw ekologicznych"], "fi": ["Luomu", "luomutuotanto", "luomutuotantoa", "luomuraaka-aine", "luonnonmukainen tuotanto", "luonnonmukaisesti"], "pt": ["Org\u00e2nico", "produ\u00e7\u00e3o org\u00e2nica", "produzido de forma org\u00e2nica", "numa quinta org\u00e2nica", "de agricultura org\u00e2nica", "agricultura org\u00e2nica"], "da": ["\u00d8kologisk", "\u00d8kologiske"]}, "description": {"sl": "Ekolo\u0161ka hrana je hrana, ki je pridelana z metodami, ki se skladajo s standardi ekolo\u0161kega kmetovanja in vklju\u010dujejo prakse, ki kro\u017eijo vire, spodbujajo ekolo\u0161ko ravnovesje in ohranjajo biotsko raznovrstnost.", "ja": "\u6709\u6a5f\u98df\u54c1\u306f\u3001\u6709\u6a5f\u8fb2\u696d\u306e\u57fa\u6e96\u306b\u6e96\u62e0\u3057\u305f\u65b9\u6cd5\u3067\u751f\u7523\u3055\u308c\u3001\u8cc7\u6e90\u306e\u5faa\u74b0\u3001\u751f\u614b\u30d0\u30e9\u30f3\u30b9\u306e\u4fc3\u9032\u3001\u751f\u7269\u591a\u69d8\u6027\u306e\u4fdd\u8b77\u3092\u7279\u5fb4\u3068\u3057\u3066\u3044\u307e\u3059\u3002", "fi": "Luomuruoka on ruokaa, joka on tuotettu noudattaen luomuviljelyn standardeja ja joka sis\u00e4lt\u00e4\u00e4 k\u00e4yt\u00e4nt\u00f6j\u00e4, jotka kierr\u00e4tt\u00e4v\u00e4t resursseja, edist\u00e4v\u00e4t ekologista tasapainoa ja s\u00e4ilytt\u00e4v\u00e4t biodiversiteetti\u00e4.", "de": "Bio-Lebensmittel m\u00fcssen aus \u00f6kologisch kontrolliertem Anbau stammen, d\u00fcrfen nicht gentechnisch ver\u00e4ndert sein und werden ohne Einsatz von chemisch-synthetischen Pflanzenschutzmitteln, Kunstd\u00fcnger oder Kl\u00e4rschlamm angebaut.", "ar": "\u0627\u0644\u0637\u0639\u0627\u0645 \u0627\u0644\u0639\u0636\u0648\u064a \u0647\u0648 \u0627\u0644\u0637\u0639\u0627\u0645 \u0627\u0644\u0630\u064a \u064a\u062a\u0645 \u0625\u0646\u062a\u0627\u062c\u0647 \u0628\u0648\u0627\u0633\u0637\u0629 \u0637\u0631\u0642 \u062a\u062a\u0648\u0627\u0641\u0642 \u0645\u0639 \u0645\u0639\u0627\u064a\u064a\u0631 \u0627\u0644\u0632\u0631\u0627\u0639\u0629 \u0627\u0644\u0639\u0636\u0648\u064a\u0629 \u0648\u062a\u062a\u0645\u064a\u0632 \u0628\u0645\u0645\u0627\u0631\u0633\u0627\u062a \u062a\u0639\u064a\u062f \u062a\u062f\u0648\u064a\u0631 \u0627\u0644\u0645\u0648\u0627\u0631\u062f \u0648\u062a\u0639\u0632\u0632 \u0627\u0644\u062a\u0648\u0627\u0632\u0646 \u0627\u0644\u0628\u064a\u0626\u064a \u0648\u062a\u062d\u0627\u0641\u0638 \u0639\u0644\u0649 \u0627\u0644\u062a\u0646\u0648\u0639 \u0627\u0644\u0628\u064a\u0648\u0644\u0648\u062c\u064a.", "sv": "Ekologisk mat \u00e4r mat som produceras enligt ekologiskt jordbruk och har metoder som fr\u00e4mjar resurshantering, ekologisk balans och bevarande av biologisk m\u00e5ngfald.", "cs": "Organick\u00e9 potraviny jsou potraviny produkovan\u00e9 metodami, kter\u00e9 spl\u0148uj\u00ed standardy ekologick\u00e9ho zem\u011bd\u011blstv\u00ed a zahrnuj\u00ed postupy, kter\u00e9 cykluj\u00ed zdroje, podporuj\u00ed ekologickou rovnov\u00e1hu a zachov\u00e1vaj\u00ed biodiverzitu.", "ru": "\u041e\u0440\u0433\u0430\u043d\u0438\u0447\u0435\u0441\u043a\u0430\u044f \u043f\u0438\u0449\u0430 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438\u0442\u0441\u044f \u0441 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u0435\u043c \u043c\u0435\u0442\u043e\u0434\u043e\u0432, \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0443\u044e\u0449\u0438\u0445 \u0441\u0442\u0430\u043d\u0434\u0430\u0440\u0442\u0430\u043c \u043e\u0440\u0433\u0430\u043d\u0438\u0447\u0435\u0441\u043a\u043e\u0433\u043e \u0437\u0435\u043c\u043b\u0435\u0434\u0435\u043b\u0438\u044f, \u0438 \u0432\u043a\u043b\u044e\u0447\u0430\u0435\u0442 \u043f\u0440\u0430\u043a\u0442\u0438\u043a\u0438, \u0441\u043f\u043e\u0441\u043e\u0431\u0441\u0442\u0432\u0443\u044e\u0449\u0438\u0435 \u0446\u0438\u043a\u043b\u0438\u0447\u0435\u0441\u043a\u043e\u043c\u0443 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044e \u0440\u0435\u0441\u0443\u0440\u0441\u043e\u0432, \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u0430\u043d\u0438\u044e \u044d\u043a\u043e\u043b\u043e\u0433\u0438\u0447\u0435\u0441\u043a\u043e\u0433\u043e \u0431\u0430\u043b\u0430\u043d\u0441\u0430 \u0438 \u0441\u043e\u0445\u0440\u0430\u043d\u0435\u043d\u0438\u044e \u0431\u0438\u043e\u0440\u0430\u0437\u043d\u043e\u043e\u0431\u0440\u0430\u0437\u0438\u044f.", "nl": "Biologische voeding is voedsel dat wordt geproduceerd met methoden die voldoen aan de normen van de biologische landbouw, welke een ecologisch evenwicht en het behoud van biodiversiteit nastreeft.", "hu": "Az organikus \u00e9lelmiszer olyan \u00e9lelmiszer, amelyet az organikus gazd\u00e1lkod\u00e1s szabv\u00e1nyainak megfelel\u0151 m\u00f3dszerekkel \u00e1ll\u00edtanak el\u0151, \u00e9s olyan gyakorlatokat alkalmaz, amelyek el\u0151seg\u00edtik az er\u0151forr\u00e1sok ciklikus haszn\u00e1lat\u00e1t, \u00f6kol\u00f3giai egyens\u00falyt \u00e9s a biol\u00f3giai sokf\u00e9les\u00e9g meg\u0151rz\u00e9s\u00e9t.", "zh": "\u6709\u673a\u98df\u54c1\u662f\u6309\u7167\u6709\u673a\u519c\u4e1a\u6807\u51c6\u751f\u4ea7\u7684\u98df\u54c1\uff0c\u91c7\u7528\u5faa\u73af\u8d44\u6e90\u3001\u4fc3\u8fdb\u751f\u6001\u5e73\u8861\u548c\u4fdd\u62a4\u751f\u7269\u591a\u6837\u6027\u7684\u505a\u6cd5\u3002", "ko": "\uc720\uae30\ub18d \uc2dd\ud488\uc740 \uc720\uae30\ub18d \ubc29\uc2dd\uc744 \uc900\uc218\ud558\uc5ec \uc0dd\uc0b0\ub418\uba70, \uc790\uc6d0 \uc21c\ud658, \uc0dd\ud0dc \uade0\ud615 \uc720\uc9c0, \uc0dd\ubb3c \ub2e4\uc591\uc131 \ubcf4\uc874\uc744 \uc7a5\ub824\ud558\ub294 \uc2e4\ucc9c \ubc29\ubc95\uc744 \uac16\ucd94\uace0 \uc788\uc2b5\ub2c8\ub2e4\u3002", "da": "\u00d8kologisk mad er mad, der produceres ved hj\u00e6lp af metoder, der overholder \u00f8kologisk landbrugsstandarder og omfatter praksis, der cykler ressourcer, fremmer \u00f8kologisk balance og bevarer biodiversitet.", "lv": "Biolo\u0123isk\u0101 p\u0101rtika ir p\u0101rtika, kas tiek ra\u017eota, iev\u0113rojot biolo\u0123isk\u0101s lauksaimniec\u012bbas standartus un ietver prakses, kas cirkul\u0113 resursus, veicina ekolo\u0123isko l\u012bdzsvaru un saglab\u0101 biolo\u0123isko daudzveid\u012bbu.", "pl": "\u017bywno\u015b\u0107 organiczna to \u017cywno\u015b\u0107 produkowana zgodnie z zasadami rolnictwa ekologicznego, cechuj\u0105ca si\u0119 praktykami, kt\u00f3re cyklizuj\u0105 zasoby, promuj\u0105 r\u00f3wnowag\u0119 ekologiczn\u0105 i zachowuj\u0105 r\u00f3\u017cnorodno\u015b\u0107 biologiczn\u0105.", "pt": "Os alimentos org\u00e2nicos s\u00e3o produzidos com m\u00e9todos que cumprem normas da agricultura org\u00e2nica, utilizando pr\u00e1ticas que promovem o equil\u00edbrio ambiental e conservam a biodiversidade.", "el": "\u039f\u03c1\u03b3\u03b1\u03bd\u03b9\u03ba\u03ac \u03c4\u03c1\u03cc\u03c6\u03b9\u03bc\u03b1 \u03b5\u03af\u03bd\u03b1\u03b9 \u03c4\u03c1\u03cc\u03c6\u03b9\u03bc\u03b1 \u03c0\u03bf\u03c5 \u03c0\u03b1\u03c1\u03ac\u03b3\u03bf\u03bd\u03c4\u03b1\u03b9 \u03bc\u03b5 \u03bc\u03b5\u03b8\u03cc\u03b4\u03bf\u03c5\u03c2 \u03c0\u03bf\u03c5 \u03c3\u03c5\u03bc\u03bc\u03bf\u03c1\u03c6\u03ce\u03bd\u03bf\u03bd\u03c4\u03b1\u03b9 \u03bc\u03b5 \u03c4\u03b1 \u03c0\u03c1\u03cc\u03c4\u03c5\u03c0\u03b1 \u03c4\u03b7\u03c2 \u03b2\u03b9\u03bf\u03bb\u03bf\u03b3\u03b9\u03ba\u03ae\u03c2 \u03b3\u03b5\u03c9\u03c1\u03b3\u03af\u03b1\u03c2 \u03ba\u03b1\u03b9 \u03c0\u03b5\u03c1\u03b9\u03bb\u03b1\u03bc\u03b2\u03ac\u03bd\u03bf\u03c5\u03bd \u03c0\u03c1\u03b1\u03ba\u03c4\u03b9\u03ba\u03ad\u03c2 \u03c0\u03bf\u03c5 \u03ba\u03c5\u03ba\u03bb\u03bf\u03c6\u03bf\u03c1\u03bf\u03cd\u03bd \u03c0\u03cc\u03c1\u03bf\u03c5\u03c2, \u03c0\u03c1\u03bf\u03c9\u03b8\u03bf\u03cd\u03bd \u03c4\u03b7\u03bd \u03bf\u03b9\u03ba\u03bf\u03bb\u03bf\u03b3\u03b9\u03ba\u03ae \u03b9\u03c3\u03bf\u03c1\u03c1\u03bf\u03c0\u03af\u03b1 \u03ba\u03b1\u03b9 \u03b4\u03b9\u03b1\u03c4\u03b7\u03c1\u03bf\u03cd\u03bd \u03c4\u03b7 \u03b2\u03b9\u03bf\u03c0\u03bf\u03b9\u03ba\u03b9\u03bb\u03cc\u03c4\u03b7\u03c4\u03b1.", "et": "\u00d6koloogiline toit on toit, mis on toodetud meetoditega, mis vastavad \u00f6koloogilise p\u00f5llumajanduse standarditele ja h\u00f5lmavad tavasid, mis ts\u00fcklivad ressursse, soodustavad \u00f6koloogilist tasakaalu ja s\u00e4ilitavad bioloogilist mitmekesisust.", "hr": "Organska hrana je hrana koja se proizvodi metodom koja se pridr\u017eava standarda organske poljoprivrede i uklju\u010duje prakse koje recikliraju resurse, promi\u010du ekolo\u0161ku ravnote\u017eu i o\u010duvaju biolo\u0161ku raznolikost.", "sk": "Organick\u00e9 potraviny s\u00fa potraviny, ktor\u00e9 sa vyr\u00e1baj\u00fa met\u00f3dami, ktor\u00e9 sp\u013a\u0148aj\u00fa \u0161tandardy ekologickej po\u013enohospod\u00e1rskej v\u00fdroby a zah\u0155\u0148aj\u00fa postupy, ktor\u00e9 cyklizuj\u00fa zdroje, podporuj\u00fa ekologick\u00fa rovnov\u00e1hu a zachov\u00e1vaj\u00fa biodiverzitu.", "ro": "Alimentele organice sunt alimente produse prin metode care respect\u0103 standardele agriculturii organice \u0219i includ practici care ciclizeaz\u0103 resursele, promoveaz\u0103 echilibrul ecologic \u0219i conserv\u0103 biodiversitatea.", "lt": "Ekologi\u0161ka maistas yra maistas, gaminamas naudojant metodik\u0105, atitinkan\u010di\u0105 ekologi\u0161ko \u017eem\u0117s \u016bkio standartus, ir apima praktikas, kurios cikli\u0161kai naudoja i\u0161teklius, skatina ekologin\u012f pusiausvyr\u0105 ir i\u0161laiko biologin\u0119 \u012fvairov\u0119.", "it": "Il cibo biologico \u00e8 prodotto mediante metodi che rispettano gli standard dell'agricoltura biologica e promuove pratiche che favoriscono il riciclo delle risorse, il bilancio ecologico e la conservazione della biodiversit\u00e0.", "tr": "Organik g\u0131da, organik tar\u0131m standartlar\u0131na uygun y\u00f6ntemlerle \u00fcretilen ve kaynak d\u00f6ng\u00fcs\u00fc, ekolojik dengeyi te\u015fvik etme ve biyolojik \u00e7e\u015fitlili\u011fi koruma uygulamalar\u0131n\u0131 i\u00e7eren g\u0131dalard\u0131r.", "es": "Se denomina alimento org\u00e1nico, alimento ecol\u00f3gico o alimento biol\u00f3gico al producto agr\u00edcola o agroindustrial que se produce bajo un conjunto de procedimientos denominados \u201cecol\u00f3gicos\u201d: evitan el uso de productos sint\u00e9ticos, como pesticidas, herbicidas y fertilizantes artificiales.", "fr": "Un aliment biologique est un aliment produit suivant les principes de l'agriculture biologique. L'agriculture biologique est une m\u00e9thode de production agricole qui exclut le recours \u00e0 la plupart des produits chimiques de synth\u00e8se, les organismes g\u00e9n\u00e9tiquement modifi\u00e9s par transg\u00e9n\u00e8se et la conservation des cultures par irradiation.", "en": "Organic food is food produced by methods complying with the standards of organic farming and features practices that cycle resources, promote ecological balance, and conserve biodiversity."}, "incompatible_with": {"en": "labels:en:non-organic"}}, "en:nutriscore": {"label_categories": {"en": "en:Nutrition Grades, en:Health"}, "name": {"fr": "Nutriscore", "en": "Nutriscore", "ca": "Comptador Nutricional", "es": "Nutriscore", "nl": "Nutriscore", "it": "Nutriscore", "he": "Nutriscore", "hr": "Nutriscore", "xx": "Nutriscore", "bg": "Nutriscore", "de": "Nutriscore", "da": "Nutriscore", "pt": "Nutriscore", "fi": "Nutriscore", "pl": "Nutriscore"}, "synonyms": {"hr": ["Nutriscore"], "xx": ["Nutriscore"], "it": ["Nutriscore"], "he": ["Nutriscore"], "ca": ["Comptador Nutricional", "Nutriscore"], "es": ["Nutriscore"], "nl": ["Nutriscore"], "fr": ["Nutriscore"], "en": ["Nutriscore"], "da": ["Nutriscore"], "pt": ["Nutriscore", "Nutri-Score"], "fi": ["Nutriscore"], "pl": ["Nutriscore"], "de": ["Nutriscore"], "bg": ["Nutriscore"]}, "wikidata": {"en": "Q46577569"}, "children": ["en:nutriscore-grade-a", "en:nutriscore-grade-a-new-calculation", "en:nutriscore-grade-b", "en:nutriscore-grade-b-new-calculation", "en:nutriscore-grade-c", "en:nutriscore-grade-c-new-calculation", "en:nutriscore-grade-d", "en:nutriscore-grade-d-new-calculation", "en:nutriscore-grade-e", "en:nutriscore-grade-e-new-calculation"]}, "en:non-eu-agriculture": {"synonyms": {"fi": ["tuotettu EU:n ulkopuolella"], "pt": ["Agricultura n\u00e3o-UE", "Agricultura n\u00e3o Uni\u00e3o Europeia"], "da": ["ikke-EU jordbruk"], "bg": ["\u0417\u0435\u043c\u0435\u0434\u0435\u043b\u0438\u0435 \u0438\u0437\u0432\u044a\u043d \u0415\u0421"], "de": ["Nicht EU-Landwirtschaft"], "he": ["\u05d7\u05e7\u05dc\u05d0\u05d5\u05ea \u05de\u05d7\u05d5\u05e5 \u05dc\u05d0\u05d9\u05d7\u05d5\u05d3 \u05d4\u05d0\u05d9\u05e8\u05d5\u05e4\u05d9"], "it": ["Agricoltura non-UE"], "cs": ["Zem\u011bd\u011blstv\u00ed mimo EU"], "ru": ["\u043d\u0435-EU \u0430\u0433\u0440\u043e\u043a\u0443\u043b\u044c\u0442\u0443\u0440\u0430"], "hr": ["poljoprivreda izvan EU"], "sv": ["icke EU-jordbruk"], "en": ["non-EU Agriculture"], "hu": ["nem-EU mez\u0151gazdas\u00e1g"], "fr": ["Agriculture non UE"], "nl": ["Non EU landbouw", "niet-EU landbouw"], "es": ["Agricultura no UE"], "ca": ["Agricultura No-UE"]}, "name": {"nl": "Non EU landbouw", "ca": "Agricultura No-UE", "es": "Agricultura no UE", "en": "Non-EU Agriculture", "hu": "Nem-EU mez\u0151gazdas\u00e1g", "fr": "Agriculture non UE", "ru": "\u041d\u0435-EU \u0430\u0433\u0440\u043e\u043a\u0443\u043b\u044c\u0442\u0443\u0440\u0430", "cs": "Zem\u011bd\u011blstv\u00ed mimo EU", "sv": "Icke EU-jordbruk", "hr": "Poljoprivreda izvan EU", "he": "\u05d7\u05e7\u05dc\u05d0\u05d5\u05ea \u05de\u05d7\u05d5\u05e5 \u05dc\u05d0\u05d9\u05d7\u05d5\u05d3 \u05d4\u05d0\u05d9\u05e8\u05d5\u05e4\u05d9", "it": "Agricoltura non-UE", "de": "Nicht EU-Landwirtschaft", "bg": "\u0417\u0435\u043c\u0435\u0434\u0435\u043b\u0438\u0435 \u0438\u0437\u0432\u044a\u043d \u0415\u0421", "pt": "Agricultura n\u00e3o-UE", "fi": "Tuotettu EU:n ulkopuolella", "da": "Ikke-EU jordbruk"}, "children": ["en:bo-bio-126", "en:eu-non-eu-agriculture"]}, "en:nutriscore-grade-c": {"label_categories": {"en": "en:Nutrition Grades, en:Health"}, "parents": ["en:nutriscore"], "name": {"he": "\u05d3\u05d9\u05e8\u05d5\u05d2 C \u05d1\u05beNutriscore", "it": "Nutriscore C", "xx": "Nutriscore C", "hr": "Nutriscore oznaka C", "en": "Nutriscore Grade C", "fr": "Nutriscore C", "nl": "Nutriscore C", "ca": "Grau C Nutriscore", "es": "Nutriscore C", "pt": "Nutri-Score grau C", "fi": "Nutriscore-arvosana C", "da": "Nutriscore C", "bg": "Nutriscore C", "de": "Nutriscore C"}, "synonyms": {"ca": ["Grau C Nutriscore"], "es": ["Nutriscore C"], "nl": ["Nutriscore C"], "fr": ["Nutriscore C"], "en": ["Nutriscore Grade C"], "xx": ["Nutriscore C"], "hr": ["Nutriscore oznaka C"], "it": ["Nutriscore C"], "he": ["\u05d3\u05d9\u05e8\u05d5\u05d2 C \u05d1\u05beNutriscore"], "de": ["Nutriscore C"], "bg": ["Nutriscore C"], "da": ["Nutriscore C"], "pt": ["Nutri-Score grau C"], "fi": ["Nutriscore-arvosana C"]}}, "en:no-artificial-flavors": {"synonyms": {"pl": ["Bez sztucznych aromat\u00f3w"], "fi": ["ei keinotekoisia aromeja", "ilman keinotekoisia aromeja"], "pt": ["Sem sabores artificiais", "livre de sabor artificial", "livre de sabores artificiais", "sem aromas artificiais"], "th": ["\u0e44\u0e21\u0e48\u0e40\u0e15\u0e34\u0e21\u0e01\u0e25\u0e34\u0e48\u0e19\u0e2a\u0e31\u0e07\u0e40\u0e04\u0e23\u0e32\u0e30\u0e2b\u0e4c"], "de": ["Ohne k\u00fcnstliche Aromen"], "bg": ["\u0411\u0435\u0437 \u0438\u0437\u043a\u0443\u0441\u0442\u0432\u0435\u043d\u0438 \u0430\u0440\u043e\u043c\u0430\u0442\u0438"], "ru": ["\u0431\u0435\u0437 \u0438\u0441\u043a\u0443\u0441\u0441\u0442\u0432\u0435\u043d\u043d\u044b\u0445 \u0430\u0440\u043e\u043c\u0430\u0442\u0438\u0437\u0430\u0442\u043e\u0440\u043e\u0432", "\u0431\u0435\u0437 \u0434\u043e\u0431\u0430\u0432\u043b\u0435\u043d\u0438\u044f \u0438\u0441\u043a\u0443\u0441\u0441\u0442\u0432\u0435\u043d\u043d\u044b\u0445 \u0430\u0440\u043e\u043c\u0430\u0442\u0438\u0437\u0430\u0442\u043e\u0440\u043e\u0432"], "hr": ["Bez umjetnih aroma"], "sv": ["Inga artificiella arom\u00e4mnen"], "he": ["\u05dc\u05dc\u05d0 \u05d7\u05d5\u05de\u05e8\u05d9 \u05d8\u05e2\u05dd \u05de\u05dc\u05d0\u05db\u05d5\u05ea\u05d9\u05d9\u05dd"], "it": ["Senza aromi artificiali"], "nl": ["Vrij van kunstmatige smaakstoffen", "Zonder kunstmatige smaakstoffen"], "es": ["Sin aromas artificiales"], "ca": ["Sense aromes artificials", "Lliure d'aromes artificials"], "en": ["No artificial flavors", "free of artificial flavor", "No artificial flavours"], "hu": ["Mesters\u00e9ges \u00edzes\u00edt\u0151k n\u00e9lk\u00fcl", "Mesters\u00e9ges arom\u00e1k n\u00e9lk\u00fcl", "Mesters\u00e9ges \u00edzes\u00edt\u0151t nem tartalmaz", "Mesters\u00e9ges arom\u00e1t nem tartalmaz", "Mesters\u00e9ges \u00edzes\u00edt\u0151 n\u00e9lk\u00fcl", "Mesters\u00e9ges aroma n\u00e9lk\u00fcl"], "fr": ["Sans ar\u00f4mes artificiels", "sans ar\u00f4me artificiel", "sans aromatisants artificiels"]}, "name": {"bg": "\u0411\u0435\u0437 \u0438\u0437\u043a\u0443\u0441\u0442\u0432\u0435\u043d\u0438 \u0430\u0440\u043e\u043c\u0430\u0442\u0438", "de": "Ohne k\u00fcnstliche Aromen", "th": "\u0e44\u0e21\u0e48\u0e40\u0e15\u0e34\u0e21\u0e01\u0e25\u0e34\u0e48\u0e19\u0e2a\u0e31\u0e07\u0e40\u0e04\u0e23\u0e32\u0e30\u0e2b\u0e4c", "pt": "Sem sabores artificiais", "pl": "Bez sztucznych aromat\u00f3w", "fi": "Ei keinotekoisia aromeja", "en": "No artificial flavors", "hu": "Mesters\u00e9ges \u00edzes\u00edt\u0151k n\u00e9lk\u00fcl", "fr": "Sans ar\u00f4mes artificiels", "nl": "Vrij van kunstmatige smaakstoffen", "ca": "Sense aromes artificials", "es": "Sin aromas artificiales", "he": "\u05dc\u05dc\u05d0 \u05d7\u05d5\u05de\u05e8\u05d9 \u05d8\u05e2\u05dd \u05de\u05dc\u05d0\u05db\u05d5\u05ea\u05d9\u05d9\u05dd", "it": "Senza aromi artificiali", "ru": "\u0411\u0435\u0437 \u0438\u0441\u043a\u0443\u0441\u0441\u0442\u0432\u0435\u043d\u043d\u044b\u0445 \u0430\u0440\u043e\u043c\u0430\u0442\u0438\u0437\u0430\u0442\u043e\u0440\u043e\u0432", "sv": "Inga artificiella arom\u00e4mnen", "hr": "Bez umjetnih aroma"}, "children": ["en:no-artificial-colours-or-flavours", "en:no-artificial-flavourings-or-sweeteners"]}, "en:no-added-sugar": {"description": {"en": "In EU, a claim that a food is with no added sugars may only be made where the product does not contain any added mono- or disaccharides or any other food used for its sweetening properties. If sugars are naturally present in the food, the following indication should also appear on the label: \u2018CONTAINS NATURALLY OCCURRING SUGARS'. -see: https://eur-lex.europa.eu/legal-content/EN/TXT/?uri=CELEX%3A02006R1924-20141213-"}, "synonyms": {"bg": ["\u0411\u0435\u0437 \u0434\u043e\u0431\u0430\u0432\u0435\u043d\u0430 \u0437\u0430\u0445\u0430\u0440"], "th": ["\u0e44\u0e21\u0e48\u0e40\u0e15\u0e34\u0e21\u0e19\u0e49\u0e33\u0e15\u0e32\u0e25"], "de": ["Ohne Zuckerzusatz", "Ohne zugesetzten Zucker"], "fi": ["ei lis\u00e4tty\u00e4 sokeria", "ilman lis\u00e4tty\u00e4 sokeria", "ei lis\u00e4ttyj\u00e4 sokereita"], "pl": ["Bez dodatku cukru"], "pt": ["Sem a\u00e7\u00facares adicionados", "sem adi\u00e7\u00e3o de a\u00e7\u00facares", "sem a\u00e7\u00facar adicionado", "sem adi\u00e7\u00e3o de a\u00e7\u00facar"], "fr": ["Sans sucre ajout\u00e9", "sans sucres ajout\u00e9s", "sans ajout de sucre"], "en": ["No added sugar", "no sugar added", "no added sugars"], "hu": ["Hozz\u00e1adott cukor n\u00e9lk\u00fcl", "Nincs hozz\u00e1adott cukor", "hozz\u00e1adott cukrot nem tartalmaz"], "zh": ["\u65e0\u6dfb\u7cd6"], "es": ["Sin az\u00facares a\u00f1adidos", "0% Az\u00facares a\u00f1adidos", "Sin az\u00facar a\u00f1adido"], "ca": ["Sense sucre afegit", "sense sucres afegits"], "nl": ["Zonder toegevoegde suikers", "Geen suiker toegevoegd"], "it": ["Senza zuccheri aggiunti"], "he": ["\u05dc\u05dc\u05d0 \u05ea\u05d5\u05e1\u05e4\u05ea \u05e1\u05d5\u05db\u05e8"], "xx": ["no added sugar"], "hr": ["bez dodanog \u0161e\u0107era", "bez dodatnih \u0161e\u0107era", "bez dodanih \u0161e\u0107era", "sadr\u017ei prirodno prisutne \u0161e\u0107ere"], "sv": ["Inget tillsat socker", "Utan tillsatt socker"], "cs": ["Bez p\u0159idan\u00e9ho cukru"]}, "name": {"he": "\u05dc\u05dc\u05d0 \u05ea\u05d5\u05e1\u05e4\u05ea \u05e1\u05d5\u05db\u05e8", "it": "Senza zuccheri aggiunti", "cs": "Bez p\u0159idan\u00e9ho cukru", "sv": "Inget tillsat socker", "xx": "No added sugar", "hr": "Bez dodanog \u0161e\u0107era", "zh": "\u65e0\u6dfb\u7cd6", "en": "No added sugar", "hu": "Hozz\u00e1adott cukor n\u00e9lk\u00fcl", "fr": "Sans sucre ajout\u00e9", "nl": "Zonder toegevoegde suikers", "ca": "Sense sucre afegit", "es": "Sin az\u00facares a\u00f1adidos", "pt": "Sem a\u00e7\u00facares adicionados", "fi": "Ei lis\u00e4tty\u00e4 sokeria", "pl": "Bez dodatku cukru", "bg": "\u0411\u0435\u0437 \u0434\u043e\u0431\u0430\u0432\u0435\u043d\u0430 \u0437\u0430\u0445\u0430\u0440", "de": "Ohne Zuckerzusatz", "th": "\u0e44\u0e21\u0e48\u0e40\u0e15\u0e34\u0e21\u0e19\u0e49\u0e33\u0e15\u0e32\u0e25"}}, "en:green-dot": {"label_categories": {"en": "en:Packaging"}, "name": {"de": "Gr\u00fcner Punkt", "th": "\u0e01\u0e23\u0e35\u0e19\u0e14\u0e2d\u0e17", "nb": "Gr\u00f8nt Punkt", "bg": "\u0417\u0435\u043b\u0435\u043d\u0430 \u0442\u043e\u0447\u043a\u0430", "pt": "Ponto Verde", "fi": "Vihre\u00e4 piste", "ca": "Punt verd", "es": "Punto Verde", "nl": "Groene Punt", "fr": "Point Vert", "en": "Green Dot", "hu": "Z\u00f6ld Pont", "cs": "Zelen\u00fd bod", "it": "Punto Verde", "he": "\u05e0\u05e7\u05d5\u05d3\u05d4 \u05d9\u05e8\u05d5\u05e7\u05d4"}, "synonyms": {"fr": ["Point Vert", "Le Point Vert"], "hu": ["Z\u00f6ld Pont", "Green Dot"], "en": ["Green Dot", "The Green Dot"], "ca": ["Punt verd"], "es": ["Punto Verde", "El Punto Verde"], "nl": ["Groene Punt"], "it": ["Punto Verde", "Il Punto Verde"], "he": ["\u05e0\u05e7\u05d5\u05d3\u05d4 \u05d9\u05e8\u05d5\u05e7\u05d4", "\u05d4\u05e0\u05e7\u05d5\u05d3\u05d4 \u05d4\u05d9\u05e8\u05d5\u05e7\u05d4"], "cs": ["Zelen\u00fd bod"], "bg": ["\u0417\u0435\u043b\u0435\u043d\u0430 \u0442\u043e\u0447\u043a\u0430"], "th": ["\u0e01\u0e23\u0e35\u0e19\u0e14\u0e2d\u0e17"], "de": ["Gr\u00fcner Punkt", "Gr\u00fcne Punkt", "Der Gr\u00fcne Punkt"], "nb": ["Gr\u00f8nt Punkt"], "pt": ["Ponto Verde", "Green Dot", "The Green Dot"], "fi": ["vihre\u00e4 piste"]}, "wikidata": {"en": "Q975485"}}, "en:eu-organic": {"wikidata": {"en": "Q380448"}, "children": ["de:eg-\u00f6ko-verordnung", "en:ad-bio-154", "en:ae-bio-102", "en:ae-bio-115", "en:ae-bio-134", "en:ae-bio-135", "en:ae-bio-140", "en:ae-bio-141", "en:ae-bio-143", "en:ae-bio-152", "en:ae-bio-154", "en:ae-bio-161", "en:ae-bio-171", "en:ae-bio-177", "en:ae-bio-179", "en:af-bio-102", "en:af-bio-112", "en:af-bio-135", "en:af-bio-143", "en:af-bio-147", "en:af-bio-149", "en:af-bio-154", "en:af-bio-161", "en:af-bio-177", "en:al-bio-102", "en:al-bio-115", "en:al-bio-132", "en:al-bio-139", "en:al-bio-140", "en:al-bio-141", "en:al-bio-143", "en:al-bio-149", "en:al-bio-154", "en:al-bio-161", "en:al-bio-171", "en:al-bio-179", "en:am-bio-102", "en:am-bio-108", "en:am-bio-112", "en:am-bio-115", "en:am-bio-139", "en:am-bio-140", "en:am-bio-141", "en:am-bio-143", "en:am-bio-149", "en:am-bio-154", "en:am-bio-161", "en:am-bio-171", "en:ao-bio-141", "en:ao-bio-149", "en:ao-bio-172", "en:ar-bio-110", "en:ar-bio-123", "en:ar-bio-135", "en:ar-bio-154", "en:at-bio-004", "en:at-bio-301", "en:at-bio-302", "en:at-bio-401", "en:at-bio-402", "en:at-bio-501", "en:at-bio-701", "en:at-bio-901", "en:at-bio-902", "en:au-bio-107", "en:au-bio-119", "en:au-bio-149", "en:az-bio-102", "en:az-bio-108", "en:az-bio-134", "en:az-bio-135", "en:az-bio-140", "en:az-bio-141", "en:az-bio-149", "en:az-bio-151", "en:az-bio-154", "en:az-bio-161", "en:az-bio-166", "en:az-bio-171", "en:ba-bio-101", "en:ba-bio-134", "en:ba-bio-139", "en:ba-bio-143", "en:ba-bio-151", "en:ba-bio-154", "en:ba-bio-161", "en:ba-bio-162", "en:ba-bio-166", "en:ba-bio-181", "en:bd-bio-132", "en:bd-bio-134", "en:bd-bio-141", "en:bd-bio-143", "en:bd-bio-147", "en:bd-bio-149", "en:bd-bio-152", "en:bd-bio-154", "en:bd-bio-177", "en:be-bio-01", "en:be-bio-02", "en:be-bio-03", "en:be-bio-04", "en:bf-bio-102", "en:bf-bio-128", "en:bf-bio-134", "en:bf-bio-140", "en:bf-bio-143", "en:bf-bio-149", "en:bf-bio-151", "en:bf-bio-154", "en:bf-bio-160", "en:bf-bio-161", "en:bg-bio-02", "en:bg-bio-03", "en:bg-bio-04", "en:bg-bio-05", "en:bg-bio-06", "en:bg-bio-07", "en:bg-bio-08", "en:bg-bio-10", "en:bg-bio-12", "en:bg-bio-13", "en:bg-bio-14", "en:bg-bio-15", "en:bg-bio-16", "en:bg-bio-17", "en:bg-bio-18", "en:bg-bio-19", "en:bg-bio-20", "en:bg-bio-21", "en:bg-bio-22", "en:bh-bio-154", "en:bi-bio-128", "en:bi-bio-143", "en:bi-bio-149", "en:bi-bio-154", "en:bj-bio-102", "en:bj-bio-128", "en:bj-bio-140", "en:bj-bio-141", "en:bj-bio-149", "en:bj-bio-152", "en:bj-bio-154", "en:bj-bio-161", "en:bj-bio-177", "en:bm-bio-149", "en:bn-bio-143", "en:bn-bio-149", "en:bo-bio-110", "en:bo-bio-116", "en:bo-bio-118", "en:bo-bio-122", "en:bo-bio-123", "en:bo-bio-132", "en:bo-bio-135", "en:bo-bio-140", "en:bo-bio-141", "en:bo-bio-143", "en:bo-bio-144", "en:bo-bio-149", "en:bo-bio-151", "en:bo-bio-178", "en:br-bio-110", "en:br-bio-122", "en:br-bio-123", "en:br-bio-132", "en:br-bio-134", "en:br-bio-135", "en:br-bio-140", "en:br-bio-141", "en:br-bio-149", "en:br-bio-154", "en:br-bio-161", "en:bs-bio-142", "en:bs-bio-143", "en:bs-bio-144", "en:bs-bio-154", "en:bt-bio-134", "en:bt-bio-140", "en:bt-bio-141", "en:bt-bio-147", "en:bt-bio-149", "en:bt-bio-171", "en:bt-bio-177", "en:bt-bio-180", "en:bw-bio-141", "en:bw-bio-149", "en:bw-bio-154", "en:by-bio-102", "en:by-bio-108", "en:by-bio-112", "en:by-bio-135", "en:by-bio-140", "en:by-bio-141", "en:by-bio-149", "en:by-bio-154", "en:by-bio-170", "en:by-bio-171", "en:by-bio-173", "en:bz-bio-123", "en:bz-bio-135", "en:bz-bio-142", "en:bz-bio-154", "en:bz-bio-169", "en:ca-bio-149", "en:cd-bio-128", "en:cd-bio-140", "en:cd-bio-143", "en:cd-bio-149", "en:cd-bio-154", "en:cf-bio-154", "en:cg-bio-154", "en:ch-bio-004", "en:ch-bio-006", "en:ch-bio-038", "en:ch-bio-086", "en:ch-bio-149", "en:ci-bio-102", "en:ci-bio-115", "en:ci-bio-128", "en:ci-bio-135", "en:ci-bio-141", "en:ci-bio-143", "en:ci-bio-149", "en:ci-bio-154", "en:ci-bio-160", "en:ci-bio-161", "en:ck-bio-107", "en:ck-bio-149", "en:cl-bio-110", "en:cl-bio-116", "en:cl-bio-123", "en:cl-bio-134", "en:cl-bio-140", "en:cl-bio-141", "en:cl-bio-143", "en:cl-bio-144", "en:cl-bio-149", "en:cl-bio-154", "en:cl-bio-171", "en:cm-bio-102", "en:cm-bio-132", "en:cm-bio-140", "en:cm-bio-142", "en:cm-bio-143", "en:cm-bio-149", "en:cm-bio-151", "en:cm-bio-154", "en:cm-bio-160", "en:cn-bio-102", "en:cn-bio-107", "en:cn-bio-119", "en:cn-bio-122", "en:cn-bio-132", "en:cn-bio-140", "en:cn-bio-141", "en:cn-bio-143", "en:cn-bio-144", "en:cn-bio-145", "en:cn-bio-149", "en:cn-bio-152", "en:cn-bio-154", "en:cn-bio-161", "en:cn-bio-171", "en:cn-bio-181", "en:co-bio-104", "en:co-bio-110", "en:co-bio-118", "en:co-bio-122", "en:co-bio-123", "en:co-bio-135", "en:co-bio-140", "en:co-bio-141", "en:co-bio-142", "en:co-bio-143", "en:co-bio-144", "en:co-bio-149", "en:co-bio-151", "en:co-bio-154", "en:co-bio-169", "en:co-bio-178", "en:cr-bio-123", "en:cr-bio-135", "en:cr-bio-141", "en:cr-bio-144", "en:cr-bio-149", "en:cu-bio-123", "en:cu-bio-134", "en:cu-bio-141", "en:cu-bio-149", "en:cu-bio-151", "en:cu-bio-154", "en:cu-bio-161", "en:cv-bio-149", "en:cv-bio-151", "en:cv-bio-154", "en:cw-bio-149", "en:cw-bio-160", "en:cy-bio-001", "en:cy-bio-002", "en:cz-bio-001", "en:cz-bio-002", "en:cz-bio-003", "en:cz-bio-004", "en:de-oko-001", "en:de-oko-003", "en:de-oko-005", "en:de-oko-006", "en:de-oko-007", "en:de-oko-009", "en:de-oko-012", "en:de-oko-013", "en:de-oko-021", "en:de-oko-022", "en:de-oko-024", "en:de-oko-034", "en:de-oko-037", "en:de-oko-039", "en:de-oko-044", "en:de-oko-060", "en:de-oko-064", "en:de-oko-070", "en:dj-bio-149", "en:dk-\u00f8ko-050", "en:dk-\u00f8ko-100", "en:dk-\u00f8ko-200", "en:dk-\u00f8ko-300", "en:do-bio-104", "en:do-bio-123", "en:do-bio-134", "en:do-bio-135", "en:do-bio-140", "en:do-bio-141", "en:do-bio-143", "en:do-bio-144", "en:do-bio-149", "en:do-bio-150", "en:do-bio-151", "en:do-bio-154", "en:do-bio-161", "en:do-bio-169", "en:do-bio-171", "en:do-bio-178", "en:dz-bio-102", "en:dz-bio-141", "en:dz-bio-142", "en:dz-bio-149", "en:dz-bio-154", "en:dz-bio-161", "en:ec-bio-110", "en:ec-bio-115", "en:ec-bio-122", "en:ec-bio-123", "en:ec-bio-132", "en:ec-bio-135", "en:ec-bio-140", "en:ec-bio-141", "en:ec-bio-143", "en:ec-bio-144", "en:ec-bio-149", "en:ec-bio-151", "en:ec-bio-154", "en:ec-bio-171", "en:ec-bio-178", "en:ee-oko-01", "en:ee-oko-02", "en:eg-bio-102", "en:eg-bio-135", "en:eg-bio-140", "en:eg-bio-141", "en:eg-bio-149", "en:eg-bio-150", "en:eg-bio-151", "en:eg-bio-152", "en:eg-bio-154", "en:eg-bio-160", "en:eg-bio-164", "en:eg-bio-171", "en:eg-bio-172", "en:eg-bio-177", "en:eg-bio-179", "en:er-bio-149", "en:es-ec0-017-an", "en:es-eco-001", "en:es-eco-001-an", "en:es-eco-001-cl", "en:es-eco-001-cm", "en:es-eco-002", "en:es-eco-002-an", "en:es-eco-002-ar", "en:es-eco-002-cl", "en:es-eco-002-cm", "en:es-eco-003-an", "en:es-eco-004", "en:es-eco-004-an", "en:es-eco-004-ar", "en:es-eco-005", "en:es-eco-005-an", "en:es-eco-005-ar", "en:es-eco-006-ar", "en:es-eco-007-ar", "en:es-eco-008-ar", "en:es-eco-009-ar", "en:es-eco-010-an", "en:es-eco-010-ar", "en:es-eco-011-an", "en:es-eco-011-ar", "en:es-eco-011-cm", "en:es-eco-012-as", "en:es-eco-013-ib", "en:es-eco-014-ic", "en:es-eco-015-cn", "en:es-eco-016-cl", "en:es-eco-017", "en:es-eco-017-cl", "en:es-eco-017-cm", "en:es-eco-019-ct", "en:es-eco-020-cv", "en:es-eco-021-ex", "en:es-eco-022-ga", "en:es-eco-023-ma", "en:es-eco-024-mu", "en:es-eco-025-na", "en:es-eco-026-vas", "en:es-eco-027-ri", "en:es-eco-028-an", "en:es-eco-028-ar", "en:es-eco-028-cl", "en:es-eco-028-cm", "en:es-eco-029-an", "en:es-eco-029-ar", "en:es-eco-029-cl", "en:es-eco-030-an", "en:es-eco-031-cl", "en:et-bio-102", "en:et-bio-115", "en:et-bio-134", "en:et-bio-135", "en:et-bio-140", "en:et-bio-141", "en:et-bio-143", "en:et-bio-149", "en:et-bio-151", "en:et-bio-152", "en:et-bio-154", "en:et-bio-161", "en:et-bio-171", "en:et-bio-177", "en:fi-eko-101", "en:fi-eko-102", "en:fi-eko-103", "en:fi-eko-104", "en:fi-eko-105", "en:fi-eko-106", "en:fi-eko-107", "en:fi-eko-108", "en:fi-eko-109", "en:fi-eko-110", "en:fi-eko-111", "en:fi-eko-112", "en:fi-eko-113", "en:fi-eko-114", "en:fi-eko-115", "en:fi-eko-201", "en:fi-eko-301", "en:fi-eko-401", "en:fj-bio-107", "en:fj-bio-130", "en:fj-bio-132", "en:fj-bio-141", "en:fj-bio-149", "en:fj-bio-151", "en:fj-bio-154", "en:fk-bio-107", "en:fr-bio-01", "en:fr-bio-07", "en:fr-bio-09", "en:fr-bio-10", "en:fr-bio-12", "en:fr-bio-13", "en:fr-bio-15", "en:fr-bio-16", "en:gb-org-01", "en:gb-org-02", "en:gb-org-03", "en:gb-org-04", "en:gb-org-05", "en:gb-org-06", "en:gb-org-07", "en:gb-org-09", "en:gb-org-13", "en:gb-org-16", "en:gb-org-17", "en:gd-bio-140", "en:gd-bio-149", "en:gd-bio-171", "en:ge-bio-102", "en:ge-bio-108", "en:ge-bio-117", "en:ge-bio-141", "en:ge-bio-149", "en:ge-bio-151", "en:ge-bio-154", "en:ge-bio-161", "en:ge-bio-166", "en:ge-bio-171", "en:ge-bio-177", "en:gh-bio-102", "en:gh-bio-128", "en:gh-bio-134", "en:gh-bio-140", "en:gh-bio-141", "en:gh-bio-142", "en:gh-bio-143", "en:gh-bio-149", "en:gh-bio-151", "en:gh-bio-152", "en:gh-bio-154", "en:gh-bio-161", "en:gh-bio-171", "en:gm-bio-141", "en:gm-bio-143", "en:gm-bio-149", "en:gm-bio-154", "en:gn-bio-149", "en:gn-bio-154", "en:gq-bio-154", "en:gr-bio-01", "en:gr-bio-02", "en:gr-bio-03", "en:gr-bio-04", "en:gr-bio-05", "en:gr-bio-06", "en:gr-bio-07", "en:gr-bio-08", "en:gr-bio-10", "en:gr-bio-12", "en:gr-bio-13", "en:gr-bio-14", "en:gr-bio-15", "en:gr-bio-16", "en:gr-bio-17", "en:gr-bio-18", "en:gt-bio-104", "en:gt-bio-118", "en:gt-bio-120", "en:gt-bio-123", "en:gt-bio-135", "en:gt-bio-140", "en:gt-bio-141", "en:gt-bio-143", "en:gt-bio-144", "en:gt-bio-149", "en:gt-bio-151", "en:gt-bio-154", "en:gt-bio-169", "en:gt-bio-178", "en:gw-bio-141", "en:gw-bio-154", "en:gw-bio-172", "en:gy-bio-123", "en:gy-bio-149", "en:gy-bio-154", "en:hk-bio-102", "en:hk-bio-107", "en:hk-bio-141", "en:hk-bio-142", "en:hk-bio-143", "en:hk-bio-149", "en:hk-bio-154", "en:hk-bio-174", "en:hn-bio-116", "en:hn-bio-118", "en:hn-bio-123", "en:hn-bio-135", "en:hn-bio-140", "en:hn-bio-141", "en:hn-bio-143", "en:hn-bio-144", "en:hn-bio-149", "en:hn-bio-151", "en:hn-bio-154", "en:hn-bio-169", "en:hn-bio-178", "en:hr-eko-01", "en:hr-eko-02", "en:hr-eko-03", "en:hr-eko-04", "en:hr-eko-05", "en:hr-eko-06", "en:hr-eko-07", "en:hr-eko-08", "en:hr-eko-09", "en:hr-eko-10", "en:hr-eko-11", "en:ht-bio-123", "en:ht-bio-141", "en:ht-bio-143", "en:ht-bio-149", "en:ht-bio-154", "en:hu-oko-01", "en:hu-oko-02", "en:id-bio-107", "en:id-bio-119", "en:id-bio-121", "en:id-bio-132", "en:id-bio-134", "en:id-bio-140", "en:id-bio-141", "en:id-bio-143", "en:id-bio-144", "en:id-bio-147", "en:id-bio-149", "en:id-bio-151", "en:id-bio-152", "en:id-bio-154", "en:id-bio-161", "en:id-bio-171", "en:id-bio-174", "en:id-bio-176", "en:ie-org-01", "en:ie-org-02", "en:ie-org-03", "en:ie-org-04", "en:ie-org-05", "en:il-bio-149", "en:in-bio-132", "en:in-bio-134", "en:in-bio-141", "en:in-bio-143", "en:in-bio-147", "en:in-bio-148", "en:in-bio-149", "en:in-bio-152", "en:in-bio-154", "en:in-bio-177", "en:in-bio-180", "en:iq-bio-102", "en:iq-bio-149", "en:ir-bio-102", "en:ir-bio-112", "en:ir-bio-115", "en:ir-bio-132", "en:ir-bio-135", "en:ir-bio-139", "en:ir-bio-140", "en:ir-bio-141", "en:ir-bio-143", "en:ir-bio-147", "en:ir-bio-149", "en:ir-bio-151", "en:ir-bio-154", "en:ir-bio-161", "en:ir-bio-166", "en:ir-bio-171", "en:it-bio-001-bz", "en:it-bio-002", "en:it-bio-002-bz", "en:it-bio-003", "en:it-bio-003-bz", "en:it-bio-004", "en:it-bio-005", "en:it-bio-006", "en:it-bio-007", "en:it-bio-008", "en:it-bio-009", "en:it-bio-010", "en:it-bio-011", "en:it-bio-012", "en:it-bio-013", "en:it-bio-014", "en:it-bio-015", "en:jm-bio-140", "en:jm-bio-144", "en:jm-bio-171", "en:jo-bio-102", "en:jo-bio-143", "en:jo-bio-149", "en:jo-bio-152", "en:jo-bio-154", "en:jo-bio-160", "en:jo-bio-171", "en:jo-bio-179", "en:jp-bio-115", "en:jp-bio-120", "en:jp-bio-141", "en:jp-bio-143", "en:jp-bio-145", "en:jp-bio-149", "en:jp-bio-154", "en:jp-bio-167", "en:ke-bio-134", "en:ke-bio-140", "en:ke-bio-141", "en:ke-bio-142", "en:ke-bio-143", "en:ke-bio-149", "en:ke-bio-151", "en:ke-bio-154", "en:ke-bio-161", "en:ke-bio-171", "en:kg-bio-102", "en:kg-bio-108", "en:kg-bio-112", "en:kg-bio-135", "en:kg-bio-140", "en:kg-bio-141", "en:kg-bio-149", "en:kg-bio-151", "en:kg-bio-154", "en:kg-bio-161", "en:kg-bio-166", "en:kg-bio-173", "en:kh-bio-132", "en:kh-bio-140", "en:kh-bio-141", "en:kh-bio-143", "en:kh-bio-148", "en:kh-bio-149", "en:kh-bio-151", "en:kh-bio-152", "en:kh-bio-154", "en:kh-bio-161", "en:kh-bio-174", "en:km-bio-102", "en:km-bio-154", "en:kr-bio-132", "en:kr-bio-141", "en:kr-bio-149", "en:kr-bio-154", "en:kr-bio-161", "en:kr-bio-174", "en:kw-bio-149", "en:kw-bio-154", "en:kw-bio-171", "en:ky-bio-135", "en:kz-bio-102", "en:kz-bio-108", "en:kz-bio-112", "en:kz-bio-115", "en:kz-bio-132", "en:kz-bio-134", "en:kz-bio-135", "en:kz-bio-139", "en:kz-bio-140", "en:kz-bio-141", "en:kz-bio-151", "en:kz-bio-154", "en:kz-bio-161", "en:kz-bio-166", "en:kz-bio-170", "en:kz-bio-171", "en:kz-bio-173", "en:la-bio-121", "en:la-bio-132", "en:la-bio-140", "en:la-bio-141", "en:la-bio-143", "en:la-bio-144", "en:la-bio-147", "en:la-bio-149", "en:la-bio-154", "en:la-bio-174", "en:lb-bio-102", "en:lb-bio-115", "en:lb-bio-149", "en:lb-bio-154", "en:lb-bio-161", "en:lb-bio-171", "en:lb-bio-179", "en:lc-bio-140", "en:li-bio-143", "en:li-bio-154", "en:li-bio-969", "en:lk-bio-115", "en:lk-bio-119", "en:lk-bio-132", "en:lk-bio-134", "en:lk-bio-141", "en:lk-bio-143", "en:lk-bio-147", "en:lk-bio-148", "en:lk-bio-149", "en:lk-bio-151", "en:lk-bio-152", "en:lk-bio-154", "en:lk-bio-169", "en:lk-bio-177", "en:lr-bio-141", "en:lr-bio-149", "en:lr-bio-154", "en:ls-bio-141", "en:ls-bio-149", "en:ls-bio-154", "en:lt-eko-001", "en:lu-bio-01", "en:lu-bio-04", "en:lu-bio-05", "en:lu-bio-06", "en:lu-bio-07", "en:lv-bio-01", "en:lv-bio-02", "en:ma-bio-102", "en:ma-bio-132", "en:ma-bio-134", "en:ma-bio-135", "en:ma-bio-140", "en:ma-bio-141", "en:ma-bio-143", "en:ma-bio-149", "en:ma-bio-151", "en:ma-bio-154", "en:ma-bio-160", "en:ma-bio-161", "en:ma-bio-165", "en:ma-bio-171", "en:ma-bio-178", "en:mc-bio-154", "en:mc-bio-165", "en:md-bio-102", "en:md-bio-108", "en:md-bio-115", "en:md-bio-135", "en:md-bio-139", "en:md-bio-140", "en:md-bio-141", "en:md-bio-151", "en:md-bio-154", "en:md-bio-161", "en:md-bio-171", "en:md-bio-173", "en:me-bio-101", "en:me-bio-139", "en:me-bio-140", "en:me-bio-141", "en:me-bio-151", "en:me-bio-154", "en:me-bio-161", "en:me-bio-162", "en:mg-bio-102", "en:mg-bio-107", "en:mg-bio-115", "en:mg-bio-134", "en:mg-bio-140", "en:mg-bio-143", "en:mg-bio-149", "en:mg-bio-151", "en:mg-bio-154", "en:mg-bio-160", "en:mg-bio-165", "en:mg-bio-181", "en:mk-bio-134", "en:mk-bio-139", "en:mk-bio-140", "en:mk-bio-141", "en:mk-bio-149", "en:mk-bio-151", "en:mk-bio-154", "en:mk-bio-157", "en:mk-bio-161", "en:mk-bio-162", "en:mk-bio-171", "en:ml-bio-102", "en:ml-bio-128", "en:ml-bio-134", "en:ml-bio-140", "en:ml-bio-143", "en:ml-bio-149", "en:ml-bio-151", "en:ml-bio-154", "en:ml-bio-160", "en:mm-bio-107", "en:mm-bio-121", "en:mm-bio-132", "en:mm-bio-140", "en:mm-bio-141", "en:mm-bio-143", "en:mm-bio-149", "en:mm-bio-152", "en:mm-bio-154", "en:mm-bio-174", "en:mm-bio-177", "en:mn-bio-122", "en:mn-bio-141", "en:mn-bio-149", "en:mn-bio-154", "en:mr-bio-154", "en:mt-org-01", "en:mu-bio-134", "en:mu-bio-149", "en:mu-bio-154", "en:mu-bio-165", "en:mu-bio-177", "en:mv-bio-147", "en:mv-bio-149", "en:mw-bio-140", "en:mw-bio-141", "en:mw-bio-142", "en:mw-bio-149", "en:mw-bio-154", "en:mx-bio-104", "en:mx-bio-105", "en:mx-bio-110", "en:mx-bio-113", "en:mx-bio-115", "en:mx-bio-116", "en:mx-bio-118", "en:mx-bio-120", "en:mx-bio-122", "en:mx-bio-123", "en:mx-bio-132", "en:mx-bio-134", "en:mx-bio-135", "en:mx-bio-140", "en:mx-bio-141", "en:mx-bio-143", "en:mx-bio-144", "en:mx-bio-149", "en:mx-bio-151", "en:mx-bio-154", "en:mx-bio-169", "en:mx-bio-178", "en:my-bio-107", "en:my-bio-115", "en:my-bio-119", "en:my-bio-121", "en:my-bio-130", "en:my-bio-132", "en:my-bio-140", "en:my-bio-141", "en:my-bio-143", "en:my-bio-144", "en:my-bio-147", "en:my-bio-149", "en:my-bio-152", "en:my-bio-154", "en:my-bio-174", "en:my-bio-177", "en:mz-bio-140", "en:mz-bio-141", "en:mz-bio-149", "en:mz-bio-152", "en:mz-bio-154", "en:mz-bio-172", "en:mz-bio-177", "en:na-bio-134", "en:na-bio-140", "en:na-bio-141", "en:na-bio-143", "en:na-bio-149", "en:na-bio-154", "en:nc-bio-154", "en:ne-bio-143", "en:ne-bio-149", "en:ne-bio-154", "en:ng-bio-102", "en:ng-bio-134", "en:ng-bio-140", "en:ng-bio-143", "en:ng-bio-149", "en:ng-bio-151", "en:ng-bio-152", "en:ng-bio-154", "en:ng-bio-177", "en:ni-bio-118", "en:ni-bio-120", "en:ni-bio-123", "en:ni-bio-140", "en:ni-bio-141", "en:ni-bio-143", "en:ni-bio-144", "en:ni-bio-149", "en:ni-bio-151", "en:ni-bio-154", "en:ni-bio-165", "en:ni-bio-169", "en:ni-bio-178", "en:nl-bio-01", "en:no-\u00f8ko-01", "en:np-bio-119", "en:np-bio-121", "en:np-bio-132", "en:np-bio-134", "en:np-bio-140", "en:np-bio-141", "en:np-bio-143", "en:np-bio-147", "en:np-bio-149", "en:np-bio-151", "en:np-bio-152", "en:np-bio-154", "en:np-bio-161", "en:np-bio-177", "en:np-bio-180", "en:nu-bio-130", "en:nz-bio-149", "en:om-bio-141", "en:om-bio-143", "en:om-bio-152", "en:om-bio-154", "en:om-bio-171", "en:om-bio-177", "en:pa-bio-110", "en:pa-bio-116", "en:pa-bio-118", "en:pa-bio-123", "en:pa-bio-135", "en:pa-bio-140", "en:pa-bio-141", "en:pa-bio-149", "en:pa-bio-154", "en:pa-bio-169", "en:pa-bio-178", "en:pe-bio-110", "en:pe-bio-118", "en:pe-bio-120", "en:pe-bio-122", "en:pe-bio-123", "en:pe-bio-135", "en:pe-bio-140", "en:pe-bio-141", "en:pe-bio-143", "en:pe-bio-144", "en:pe-bio-149", "en:pe-bio-151", "en:pe-bio-154", "en:pe-bio-169", "en:pe-bio-171", "en:pe-bio-178", "en:pe-bio-179", "en:pf-bio-132", "en:pf-bio-141", "en:pf-bio-149", "en:pf-bio-154", "en:pg-bio-107", "en:pg-bio-119", "en:pg-bio-140", "en:pg-bio-141", "en:pg-bio-147", "en:pg-bio-149", "en:pg-bio-151", "en:pg-bio-171", "en:ph-bio-102", "en:ph-bio-132", "en:ph-bio-140", "en:ph-bio-141", "en:ph-bio-143", "en:ph-bio-144", "en:ph-bio-147", "en:ph-bio-149", "en:ph-bio-151", "en:ph-bio-152", "en:ph-bio-154", "en:ph-bio-161", "en:ph-bio-171", "en:ph-bio-174", "en:ph-bio-177", "en:pk-bio-112", "en:pk-bio-135", "en:pk-bio-140", "en:pk-bio-141", "en:pk-bio-143", "en:pk-bio-147", "en:pk-bio-149", "en:pk-bio-154", "en:pk-bio-171", "en:pk-bio-177", "en:pl-eko-01", "en:pl-eko-02", "en:pl-eko-03", "en:pl-eko-04", "en:pl-eko-05", "en:pl-eko-06", "en:pl-eko-07", "en:pl-eko-08", "en:pl-eko-09", "en:pl-eko-10", "en:ps-bio-140", "en:ps-bio-143", "en:ps-bio-149", "en:ps-bio-154", "en:ps-bio-163", "en:pt-bio-01", "en:pt-bio-02", "en:pt-bio-03", "en:pt-bio-04", "en:pt-bio-05", "en:pt-bio-06", "en:pt-bio-07", "en:pt-bio-08", "en:pt-bio-09", "en:pt-bio-10", "en:pt-bio-11", "en:py-bio-110", "en:py-bio-122", "en:py-bio-123", "en:py-bio-132", "en:py-bio-135", "en:py-bio-140", "en:py-bio-141", "en:py-bio-143", "en:py-bio-149", "en:py-bio-151", "en:py-bio-154", "en:qa-bio-102", "en:qa-bio-152", "en:qa-bio-177", "en:ro-eco-001", "en:ro-eco-005", "en:ro-eco-007", "en:ro-eco-008", "en:ro-eco-009", "en:ro-eco-010", "en:ro-eco-014", "en:ro-eco-015", "en:ro-eco-016", "en:ro-eco-018", "en:ro-eco-021", "en:ro-eco-022", "en:ro-eco-023", "en:rs-bio-101", "en:rs-bio-102", "en:rs-bio-132", "en:rs-bio-134", "en:rs-bio-139", "en:rs-bio-140", "en:rs-bio-141", "en:rs-bio-149", "en:rs-bio-151", "en:rs-bio-154", "en:rs-bio-161", "en:rs-bio-162", "en:rs-bio-171", "en:rs-bio-179", "en:ru-bio-102", "en:ru-bio-108", "en:ru-bio-110", "en:ru-bio-112", "en:ru-bio-115", "en:ru-bio-122", "en:ru-bio-134", "en:ru-bio-135", "en:ru-bio-140", "en:ru-bio-141", "en:ru-bio-151", "en:ru-bio-152", "en:ru-bio-154", "en:ru-bio-161", "en:ru-bio-170", "en:ru-bio-171", "en:ru-bio-173", "en:ru-bio-177", "en:rw-bio-128", "en:rw-bio-140", "en:rw-bio-143", "en:rw-bio-149", "en:rw-bio-154", "en:rw-bio-171", "en:sa-bio-102", "en:sa-bio-140", "en:sa-bio-141", "en:sa-bio-143", "en:sa-bio-152", "en:sa-bio-154", "en:sa-bio-171", "en:sa-bio-179", "en:sb-bio-119", "en:sb-bio-151", "en:sc-bio-102", "en:sc-bio-141", "en:sc-bio-154", "en:sd-bio-141", "en:sd-bio-143", "en:sd-bio-149", "en:sd-bio-154", "en:sd-bio-171", "en:sd-bio-177", "en:se-eko-01", "en:se-eko-03", "en:se-eko-04", "en:se-eko-05", "en:se-eko-07", "en:se-eko-08", "en:sg-bio-107", "en:sg-bio-119", "en:sg-bio-132", "en:sg-bio-140", "en:sg-bio-141", "en:sg-bio-142", "en:sg-bio-143", "en:sg-bio-149", "en:sg-bio-152", "en:sg-bio-154", "en:sg-bio-174", "en:si-eko-001", "en:si-eko-002", "en:si-eko-003", "en:si-eko-004", "en:sk-bio-002", "en:sk-bio-003", "en:sl-bio-140", "en:sl-bio-143", "en:sl-bio-149", "en:sl-bio-154", "en:sm-bio-102", "en:sm-bio-115", "en:sm-bio-132", "en:sm-bio-150", "en:sn-bio-102", "en:sn-bio-115", "en:sn-bio-128", "en:sn-bio-132", "en:sn-bio-134", "en:sn-bio-140", "en:sn-bio-141", "en:sn-bio-149", "en:sn-bio-150", "en:sn-bio-151", "en:sn-bio-154", "en:sn-bio-160", "en:sn-bio-161", "en:so-bio-140", "en:so-bio-149", "en:so-bio-154", "en:sr-bio-123", "en:sr-bio-143", "en:sr-bio-149", "en:sr-bio-151", "en:sr-bio-154", "en:ss-bio-149", "en:ss-bio-154", "en:st-bio-149", "en:st-bio-154", "en:st-bio-172", "en:sv-bio-104", "en:sv-bio-118", "en:sv-bio-120", "en:sv-bio-123", "en:sv-bio-135", "en:sv-bio-140", "en:sv-bio-141", "en:sv-bio-143", "en:sv-bio-144", "en:sv-bio-149", "en:sv-bio-151", "en:sv-bio-154", "en:sv-bio-169", "en:sv-bio-178", "en:sy-bio-102", "en:sy-bio-115", "en:sy-bio-143", "en:sy-bio-149", "en:sy-bio-154", "en:sz-bio-134", "en:sz-bio-141", "en:sz-bio-149", "en:sz-bio-154", "en:td-bio-141", "en:td-bio-149", "en:td-bio-154", "en:td-bio-161", "en:tg-bio-102", "en:tg-bio-128", "en:tg-bio-132", "en:tg-bio-134", "en:tg-bio-140", "en:tg-bio-143", "en:tg-bio-149", "en:tg-bio-151", "en:tg-bio-152", "en:tg-bio-154", "en:tg-bio-177", "en:th-bio-102", "en:th-bio-107", "en:th-bio-115", "en:th-bio-121", "en:th-bio-132", "en:th-bio-140", "en:th-bio-141", "en:th-bio-142", "en:th-bio-143", "en:th-bio-147", "en:th-bio-149", "en:th-bio-151", "en:th-bio-152", "en:th-bio-154", "en:th-bio-171", "en:th-bio-174", "en:th-bio-177", "en:tj-bio-102", "en:tj-bio-108", "en:tj-bio-112", "en:tj-bio-135", "en:tj-bio-140", "en:tj-bio-141", "en:tj-bio-149", "en:tj-bio-154", "en:tj-bio-161", "en:tj-bio-170", "en:tj-bio-173", "en:tl-bio-119", "en:tl-bio-140", "en:tl-bio-149", "en:tl-bio-154", "en:tm-bio-102", "en:tm-bio-112", "en:tm-bio-135", "en:tm-bio-141", "en:tm-bio-149", "en:tm-bio-151", "en:tm-bio-154", "en:tn-bio-102", "en:tn-bio-149", "en:tn-bio-154", "en:tn-bio-161", "en:to-bio-107", "en:to-bio-119", "en:to-bio-149", "en:to-bio-151", "en:tr-bio-102", "en:tr-bio-115", "en:tr-bio-132", "en:tr-bio-134", "en:tr-bio-135", "en:tr-bio-139", "en:tr-bio-140", "en:tr-bio-141", "en:tr-bio-144", "en:tr-bio-154", "en:tr-bio-161", "en:tr-bio-166", "en:tr-bio-171", "en:tr-bio-175", "en:tr-bio-178", "en:tr-bio-179", "en:tt-bio-123", "en:tv-bio-149", "en:tv-bio-151", "en:tw-bio-107", "en:tw-bio-140", "en:tw-bio-141", "en:tw-bio-143", "en:tw-bio-144", "en:tw-bio-145", "en:tw-bio-149", "en:tw-bio-154", "en:tw-bio-171", "en:tw-bio-174", "en:tz-bio-128", "en:tz-bio-134", "en:tz-bio-140", "en:tz-bio-141", "en:tz-bio-143", "en:tz-bio-149", "en:tz-bio-151", "en:tz-bio-152", "en:tz-bio-154", "en:tz-bio-161", "en:tz-bio-171", "en:tz-bio-177", "en:ua-bio-102", "en:ua-bio-108", "en:ua-bio-112", "en:ua-bio-115", "en:ua-bio-132", "en:ua-bio-134", "en:ua-bio-135", "en:ua-bio-139", "en:ua-bio-140", "en:ua-bio-141", "en:ua-bio-149", "en:ua-bio-150", "en:ua-bio-151", "en:ua-bio-154", "en:ua-bio-161", "en:ua-bio-171", "en:ua-bio-173", "en:ua-bio-177", "en:ug-bio-102", "en:ug-bio-128", "en:ug-bio-134", "en:ug-bio-140", "en:ug-bio-141", "en:ug-bio-142", "en:ug-bio-143", "en:ug-bio-149", "en:ug-bio-151", "en:ug-bio-152", "en:ug-bio-154", "en:ug-bio-171", "en:ug-bio-177", "en:us-bio-140", "en:us-bio-141", "en:us-bio-143", "en:us-bio-144", "en:us-bio-149", "en:uy-bio-110", "en:uy-bio-115", "en:uy-bio-123", "en:uy-bio-132", "en:uy-bio-135", "en:uy-bio-140", "en:uy-bio-141", "en:uy-bio-149", "en:uy-bio-151", "en:uy-bio-154", "en:uz-bio-102", "en:uz-bio-108", "en:uz-bio-112", "en:uz-bio-115", "en:uz-bio-135", "en:uz-bio-140", "en:uz-bio-141", "en:uz-bio-149", "en:uz-bio-151", "en:uz-bio-154", "en:uz-bio-161", "en:uz-bio-171", "en:uz-bio-173", "en:ve-bio-118", "en:ve-bio-123", "en:ve-bio-140", "en:ve-bio-141", "en:ve-bio-142", "en:ve-bio-143", "en:ve-bio-149", "en:ve-bio-151", "en:ve-bio-154", "en:vn-bio-102", "en:vn-bio-115", "en:vn-bio-121", "en:vn-bio-132", "en:vn-bio-140", "en:vn-bio-141", "en:vn-bio-142", "en:vn-bio-143", "en:vn-bio-144", "en:vn-bio-147", "en:vn-bio-149", "en:vn-bio-151", "en:vn-bio-152", "en:vn-bio-154", "en:vn-bio-161", "en:vn-bio-171", "en:vn-bio-174", "en:vn-bio-177", "en:vu-bio-107", "en:vu-bio-130", "en:vu-bio-154", "en:ws-bio-119", "en:ws-bio-130", "en:ws-bio-140", "en:ws-bio-142", "en:ws-bio-151", "en:ws-bio-152", "en:ws-bio-154", "en:xk-bio-101", "en:xk-bio-139", "en:xk-bio-141", "en:xk-bio-149", "en:xk-bio-154", "en:xk-bio-161", "en:xk-bio-171", "en:xk-bio-179", "en:za-bio-102", "en:za-bio-134", "en:za-bio-140", "en:za-bio-141", "en:za-bio-142", "en:za-bio-143", "en:za-bio-144", "en:za-bio-149", "en:za-bio-151", "en:za-bio-154", "en:za-bio-161", "en:za-bio-171", "en:zm-bio-141", "en:zm-bio-149", "en:zm-bio-154", "en:zw-bio-134", "en:zw-bio-140", "en:zw-bio-149", "en:zw-bio-154", "fr:ab-agriculture-biologique", "hr:ekolo\u0161kog-proizvoda", "lt:ekologinis-\u017eem\u0117s-\u016bkis"], "name": {"ro": "Organic UE", "lt": "ES ekologi\u0161kas", "it": "Biologico UE", "he": "\u05d0\u05d5\u05e8\u05d2\u05e0\u05d9 \u05d1\u05ea\u05e7\u05df \u05d0\u05d9\u05e8\u05d5\u05e4\u05d0\u05d9", "tr": "AB Organik", "hr": "EU Organic", "fr": "Bio europ\u00e9en", "en": "EU Organic", "es": "Ecol\u00f3gico UE", "ca": "Org\u00e0nic UE", "da": "EU-\u00f8kologisk", "pl": "EU Organic", "pt": "Org\u00e2nico EU", "ko": "EU \uc720\uae30\ub18d", "el": "\u0392\u03b9\u03bf\u03bb\u03bf\u03b3\u03b9\u03ba\u03cc\u03c2 \u0395\u0395", "et": "EL-\u00f6koloogiline", "bg": "\u0415\u0421 \u0411\u0438\u043e", "vi": "H\u1eefu c\u01a1 EU", "sv": "EU-ekologisk", "cs": "EU bio", "ru": "\u0415\u0421 \u041e\u0440\u0433\u0430\u043d\u0438\u0447\u0435\u0441\u043a\u0438\u0439", "hu": "EU Organic", "zh": "\u6b27\u76df\u6709\u673a", "nl": "EU Organic", "uk": "\u0404\u0421 \u041e\u0440\u0433\u0430\u043d\u0456\u0447\u043d\u0438\u0439", "fi": "EU Luomu", "no": "EU-\u00f8kologisk", "ja": "EU\u30aa\u30fc\u30ac\u30cb\u30c3\u30af", "de": "EU-\u00d6ko-Verordnung"}, "synonyms": {"nl": ["EU Organic"], "zh": ["\u6b27\u76df\u6709\u673a"], "hu": ["EU Organic", "EU bio", "EU \u00f6ko"], "sv": ["EU-ekologisk"], "ru": ["\u0415\u0421 \u041e\u0440\u0433\u0430\u043d\u0438\u0447\u0435\u0441\u043a\u0438\u0439", "\u0415\u0432\u0440\u043e\u043f\u0435\u0439\u0441\u043a\u0438\u0439 \u043e\u0440\u0433\u0430\u043d\u0438\u0447\u0435\u0441\u043a\u0438\u0439", "\u0415\u0432\u0440\u043e\u043f\u0435\u0439\u0441\u043a\u0438\u0439 \u043b\u0438\u0441\u0442", "\u0415\u0432\u0440\u043e\u043f\u0435\u0439\u0441\u043a\u0438\u0439 \u043e\u0440\u0433\u0430\u043d\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u043b\u0438\u0441\u0442", "\u0415\u0421 \u043e\u0440\u0433\u0430\u043d\u0438\u0447\u0435\u0441\u043a\u043e\u0435 \u0437\u0435\u043c\u043b\u0435\u0434\u0435\u043b\u0438\u0435"], "cs": ["EU bio"], "vi": ["H\u1eefu c\u01a1 EU", "H\u1eefu c\u01a1 Ch\u00e2u \u00c2u", "L\u00e1 Ch\u00e2u \u00c2u", "L\u00e1 h\u1eefu c\u01a1 Ch\u00e2u \u00c2u", "N\u00f4ng nghi\u1ec7p h\u1eefu c\u01a1 EU"], "de": ["EU-\u00d6ko-Verordnung", "Europ\u00e4ische \u00d6ko-Verordnung", "EU \u00d6kologische"], "ja": ["EU\u30aa\u30fc\u30ac\u30cb\u30c3\u30af", "\u30e8\u30fc\u30ed\u30c3\u30d1\u30aa\u30fc\u30ac\u30cb\u30c3\u30af", "\u30e8\u30fc\u30ed\u30c3\u30d1\u306e\u8449", "\u30e8\u30fc\u30ed\u30c3\u30d1\u306e\u30aa\u30fc\u30ac\u30cb\u30c3\u30af\u8449", "EU\u30aa\u30fc\u30ac\u30cb\u30c3\u30af\u8fb2\u696d"], "uk": ["\u0404\u0421 \u041e\u0440\u0433\u0430\u043d\u0456\u0447\u043d\u0438\u0439", "\u0404\u0432\u0440\u043e\u043f\u0435\u0439\u0441\u044c\u043a\u0438\u0439 \u043e\u0440\u0433\u0430\u043d\u0456\u0447\u043d\u0438\u0439", "\u0404\u0432\u0440\u043e\u043f\u0435\u0439\u0441\u044c\u043a\u0438\u0439 \u043b\u0438\u0441\u0442", "\u0404\u0432\u0440\u043e\u043f\u0435\u0439\u0441\u044c\u043a\u0438\u0439 \u043e\u0440\u0433\u0430\u043d\u0456\u0447\u043d\u0438\u0439 \u043b\u0438\u0441\u0442", "\u0404\u0421 \u043e\u0440\u0433\u0430\u043d\u0456\u0447\u043d\u0435 \u0441\u0456\u043b\u044c\u0441\u044c\u043a\u0435 \u0433\u043e\u0441\u043f\u043e\u0434\u0430\u0440\u0441\u0442\u0432\u043e"], "no": ["EU-\u00f8kologisk", "Europeisk \u00f8kologisk", "Europeisk blad", "Europeisk \u00f8kologisk blad", "EU \u00f8kologisk landbruk"], "fi": ["EU Luomu", "EU:n luomu", "Euroopan luomu", "Euroopan lehti", "Euroopan luomulehti"], "ca": ["Org\u00e0nic UE"], "es": ["Ecol\u00f3gico UE", "Ecol\u00f3gico U.E.", "Producto ecol\u00f3gico europeo"], "fr": ["Bio europ\u00e9en", "Label bio europ\u00e9en", "bio ce", "bio europe", "Eurofeuille", "Euro-Feuille", "Feuille bio"], "en": ["EU Organic", "European Organic", "European leaf", "European organic leaf", "EU organic farming"], "hr": ["EU Organic"], "it": ["Biologico UE"], "lt": ["ES ekologi\u0161kas", "Europos ekologi\u0161kas", "europinis lapas", "Europos ekologi\u0161kas lapas", "ES ekologinis \u016bkininkavimas"], "ro": ["Organic UE", "Organic european", "Frunz\u0103 european\u0103", "Frunz\u0103 organic\u0103 european\u0103", "Agricultur\u0103 organic\u0103 UE"], "tr": ["AB Organik", "Avrupa Organik", "Avrupa yapra\u011f\u0131", "Avrupa organik yapra\u011f\u0131", "AB organik tar\u0131m"], "he": ["\u05d0\u05d5\u05e8\u05d2\u05e0\u05d9 \u05d1\u05ea\u05e7\u05df \u05d0\u05d9\u05e8\u05d5\u05e4\u05d0\u05d9"], "et": ["EL-\u00f6koloogiline", "Euroopa \u00f6koloogiline", "Euroopa leht", "Euroopa \u00f6koloogiline leht", "EL \u00f6koloogiline p\u00f5llumajandus"], "el": ["\u0392\u03b9\u03bf\u03bb\u03bf\u03b3\u03b9\u03ba\u03cc\u03c2 \u0395\u0395", "\u0395\u03c5\u03c1\u03c9\u03c0\u03b1\u03ca\u03ba\u03cc\u03c2 \u0392\u03b9\u03bf\u03bb\u03bf\u03b3\u03b9\u03ba\u03cc\u03c2", "\u0395\u03c5\u03c1\u03c9\u03c0\u03b1\u03ca\u03ba\u03cc \u03c6\u03cd\u03bb\u03bb\u03bf", "\u0395\u03c5\u03c1\u03c9\u03c0\u03b1\u03ca\u03ba\u03cc \u03b2\u03b9\u03bf\u03bb\u03bf\u03b3\u03b9\u03ba\u03cc \u03c6\u03cd\u03bb\u03bb\u03bf", "\u0395\u0395 \u03b2\u03b9\u03bf\u03bb\u03bf\u03b3\u03b9\u03ba\u03ae \u03b3\u03b5\u03c9\u03c1\u03b3\u03af\u03b1"], "bg": ["\u0415\u0421 \u0411\u0438\u043e", "\u0415\u0432\u0440\u043e\u043f\u0435\u0439\u0441\u043a\u043e \u0431\u0438\u043e", "\u0415\u0432\u0440\u043e\u043f\u0435\u0439\u0441\u043a\u043e \u043b\u043e\u0433\u043e", "\u0415\u0432\u0440\u043e\u043f\u0435\u0439\u0441\u043a\u043e \u0431\u0438\u043e \u043b\u043e\u0433\u043e", "\u0415\u0421 \u0431\u0438\u043e \u0437\u0435\u043c\u0435\u0434\u0435\u043b\u0438\u0435"], "ko": ["EU \uc720\uae30\ub18d", "\uc720\ub7fd \uc720\uae30\ub18d", "\uc720\ub7fd \uc78e", "\uc720\ub7fd \uc720\uae30\ub18d \uc78e", "EU \uc720\uae30\ub18d \ub18d\uc5c5"], "da": ["EU-\u00f8kologisk"], "pt": ["Org\u00e2nico EU", "Org\u00e2nico Europeu", "Folha Europeia", "Folha Org\u00e2nica Europeia"], "pl": ["EU Organic", "Europejski Organic", "Europejski li\u015b\u0107", "Europejski organiczny li\u015b\u0107", "EU organiczne rolnictwo"]}, "parents": ["en:organic"]}, "en:eu-agriculture": {"children": ["en:eu-non-eu-agriculture", "en:french-agriculture"], "name": {"fi": "Tuotettu EU:ssa", "pt": "Agricultura UE", "de": "EU-Landwirtschaft", "bg": "\u0417\u0435\u043c\u0435\u0434\u0435\u043b\u0438\u0435 \u043e\u0442 \u0415\u0421", "hr": "EU poljoprivreda", "ru": "EU \u0430\u0433\u0440\u043e\u043a\u0443\u043b\u044c\u0442\u0443\u0440\u0430", "cs": "Zem\u011bd\u011blstv\u00ed EU", "it": "Agricoltura dell'Unione Europea", "lt": "ES \u017eem\u0117s \u016bkis", "he": "\u05d7\u05e7\u05dc\u05d0\u05d5\u05ea \u05d4\u05d0\u05d9\u05d7\u05d5\u05d3 \u05d4\u05d0\u05d9\u05e8\u05d5\u05e4\u05d9", "es": "Agricultura UE", "ca": "Agricultura UE", "nl": "EU landbouw", "fr": "Agriculture UE", "hu": "EU mez\u0151gazdas\u00e1g", "en": "EU Agriculture"}, "synonyms": {"fi": ["tuotettu EU:ssa"], "pt": ["Agricultura UE", "Agricultura da Uni\u00e3o Europeia"], "bg": ["\u0417\u0435\u043c\u0435\u0434\u0435\u043b\u0438\u0435 \u043e\u0442 \u0415\u0421"], "de": ["EU-Landwirtschaft"], "he": ["\u05d7\u05e7\u05dc\u05d0\u05d5\u05ea \u05d4\u05d0\u05d9\u05d7\u05d5\u05d3 \u05d4\u05d0\u05d9\u05e8\u05d5\u05e4\u05d9"], "lt": ["ES \u017eem\u0117s \u016bkis", "Europos S\u0105jungos \u017eem\u0117s \u016bkis"], "it": ["Agricoltura dell'Unione Europea"], "cs": ["Zem\u011bd\u011blstv\u00ed EU"], "ru": ["EU \u0430\u0433\u0440\u043e\u043a\u0443\u043b\u044c\u0442\u0443\u0440\u0430", "\u0430\u0433\u0440\u043e\u043a\u0443\u043b\u044c\u0442\u0443\u0440\u0430 \u0415\u0432\u0440\u043e\u043f\u0435\u0439\u0441\u043a\u043e\u0433\u043e \u0421\u043e\u044e\u0437\u0430"], "hr": ["EU poljoprivreda"], "en": ["EU Agriculture", "European Union agriculture"], "hu": ["EU mez\u0151gazdas\u00e1g"], "fr": ["Agriculture UE", "Agriculture Union Europ\u00e9eenne"], "nl": ["EU landbouw"], "es": ["Agricultura UE", "Agricultura Uni\u00f3n Europea", "Agricultura de la UE"], "ca": ["Agricultura UE"]}}, "en:fairtrade-international": {"parents": ["en:fair-trade"], "synonyms": {"ru": ["\u0427\u0435\u0441\u0442\u043d\u0430\u044f \u0442\u043e\u0440\u0433\u043e\u0432\u043b\u044f", "FLO"], "xx": ["Fairtrade International"], "fi": ["Fairtrade International", "FLO"], "pt": ["Fairtrade International"], "it": ["Commercio Equo Internazionale"], "nl": ["Fairtrade International", "Eerlijke handel"], "es": ["Fairtrade International", "Fairtrade", "FLO international", "FLO"], "de": ["Fairtrade International", "Fairtrade", "FLO international", "FLO"], "ca": ["Fairtrade Internacional", "FLO International"], "en": ["Fairtrade International", "Fairtrade", "FLO international", "FLO"], "bg": ["Fairtrade International", "Fairtrade"], "fr": ["Fairtrade International", "Fairtrade", "FLO international", "FLO"]}, "wikipedia": {"en": "https://en.wikipedia.org/wiki/Fairtrade_certification"}, "name": {"en": "Fairtrade International", "bg": "Fairtrade International", "fr": "Fairtrade International", "nl": "Fairtrade International", "es": "Fairtrade International", "de": "Fairtrade International", "ca": "Fairtrade Internacional", "fi": "Fairtrade International", "pt": "Fairtrade International", "it": "Commercio Equo Internazionale", "ru": "\u0427\u0435\u0441\u0442\u043d\u0430\u044f \u0442\u043e\u0440\u0433\u043e\u0432\u043b\u044f", "xx": "Fairtrade International"}, "children": ["en:fairtrade-australia-and-new-zealand", "en:fairtrade-austria", "en:fairtrade-belgium", "en:fairtrade-canada", "en:fairtrade-india", "en:fairtrade-italia", "en:fairtrade-korea", "en:fairtrade-mark-ireland", "en:fairtrade-norway", "en:fairtrade-poland", "en:fairtrade-sweden", "en:fairtrade-taiwan", "en:max-havelaar", "en:max-havelaar-denmark", "en:the-czech-fairtrade-association", "en:the-fairtrade-foundation", "en:transfair-germany", "en:transfair-japan", "en:transfair-minka-luxembourg", "es:asociacion-del-sello-de-productos-de-comercio-justo"]}} \ No newline at end of file diff --git a/tests/int/data/test_off.yml b/tests/int/data/test_off.yml new file mode 100644 index 00000000..3a17f8ef --- /dev/null +++ b/tests/int/data/test_off.yml @@ -0,0 +1,67 @@ +# This is a small configuration for integration tests using OFF like data +indices: + "test_off": + index: + id_field_name: code + last_modified_field_name: last_modified_t + name: test_off + number_of_replicas: 1 + number_of_shards: 1 + fields: + code: + required: true + type: keyword + product_name: + full_text_search: true + type: text_lang + categories: + full_text_search: true + input_field: categories_tags + taxonomy_name: categories + type: taxonomy + bucket_agg: true + labels: + full_text_search: true + input_field: labels_tags + taxonomy_name: labels + type: taxonomy + bucket_agg: true + unique_scans_n: + type: integer + nova_groups: + type: keyword + bucket_agg: true + last_modified_t: + type: date + created_t: + type: date + nutriments: + type: object + completeness: + type: float + lang_separator: _ + match_phrase_boost: 2.0 + # todo ? + preprocessor: tests.int.helpers.TestDocumentPreprocessor + document_fetcher: tests.int.helpers.TestDocumentFetcher + result_processor: tests.int.helpers.TestResultProcessor + split_separator: ',' + redis_stream_name: product_updates_off + primary_color: "#341100" + accent_color: "#ff8714" + taxonomy: + sources: + - name: categories + url: file:///opt/search/tests/int/data/test_categories.full.json + - name: labels + url: file:///opt/search/tests/int/data/test_labels.full.json + index: + number_of_replicas: 1 + number_of_shards: 4 + name: test_off_taxonomy + supported_langs: + # a specific language to put the main language entry + - main + - en + - fr +default_index: "test_off" diff --git a/tests/int/data/test_off_data.json b/tests/int/data/test_off_data.json new file mode 100644 index 00000000..236ffc9e --- /dev/null +++ b/tests/int/data/test_off_data.json @@ -0,0 +1,284 @@ +[ + { + "code": "20889463", + "categories": [ + "en:sweeteners", + "en:sugars", + "en:granulated-sugars" + ], + "labels": [ + "en:no-lactose" + ], + "unique_scans_n": 15, + "nova_groups": "2", + "last_modified_t": 1700525044, + "created_t": 1537090000, + "nutriments": { + "fat_100g": 0, + "energy-kcal_100g": 400, + "proteins_100g": 0, + "saturated-fat_100g": 0, + "salt_100g": 0, + "carbohydrates_100g": 100, + "sugars_100g": 100, + "sodium_100g": 0 + }, + "completeness": 0.5874999999999999, + "product_name": "Granulated sugar" + }, + { + "code": "3048665713221", + "categories": [ + "en:plant-based-foods-and-beverages", + "en:plant-based-foods", + "en:fruits-and-vegetables-based-foods", + "en:vegetables-based-foods", + "en:dried-products", + "en:dried-plant-based-foods", + "en:tomatoes-and-their-products", + "en:dried-vegetables", + "en:dried-tomatoes", + "fr:tortellini-pesto-rosso" + ], + "labels": [ + "en:no-artificial-flavors", + "en:no-preservatives", + "en:nutriscore", + "en:nutriscore-grade-a" + ], + "unique_scans_n": 14, + "last_modified_t": 1682531455, + "created_t": 1651170000, + "nutriments": { + "carbohydrates_100g": 41, + "proteins_100g": 11, + "saturated-fat_100g": 2.6, + "sugars_100g": 6, + "fat_100g": 10, + "energy-kj_100g": 1303, + "energy-kcal_100g": 310, + "fiber_100g": 6.1, + "salt_100g": 0.65, + "sodium_100g": 0.26 + }, + "completeness": 0.6499999999999999, + "product_name": "Tortellini pesto Rosso" + }, + { + "code": "3661344653573", + "categories": [ + "en:dairies", + "en:desserts", + "en:dairy-desserts", + "en:creamy-puddings", + "en:coffee-creamy-puddings" + ], + "labels": [ + "en:fair-trade", + "en:organic", + "en:eu-organic", + "en:fairtrade-international", + "en:eu-agriculture", + "en:fr-bio-01", + "en:max-havelaar", + "en:nutriscore", + "en:nutriscore-grade-c", + "fr:ab-agriculture-biologique" + ], + "unique_scans_n": 13, + "nova_groups": "3", + "last_modified_t": 1677189702, + "created_t": 1601970000, + "nutriments": { + "proteins_100g": 3.9, + "carbohydrates_100g": 19.2, + "fat_100g": 5, + "saturated-fat_100g": 3.5, + "sugars_100g": 15, + "energy-kcal_100g": 137, + "sodium_100g": 0.16, + "salt_100g": 0.4 + }, + "completeness": 0.7874999999999999, + "product_name": "Yaourt Crémeuh Café" + }, + { + "code": "3256229523999", + "categories": [ + "en:snacks", + "en:sweet-snacks", + "en:biscuits-and-cakes", + "en:biscuits", + "en:chocolate-biscuits", + "en:shortbread-cookies", + "en:digestives-covered-with-chocolate" + ], + "labels": [ + "en:organic", + "en:eu-organic", + "en:non-eu-agriculture", + "en:eu-agriculture", + "en:eu-non-eu-agriculture", + "en:fr-bio-01", + "en:nutriscore", + "en:nutriscore-grade-d", + "fr:ab-agriculture-biologique" + ], + "unique_scans_n": 12, + "nova_groups": "3", + "last_modified_t": 1701050992, + "created_t": 1678120000, + "nutriments": { + "energy-kcal_100g": 505, + "fiber_100g": 6, + "salt_100g": 0.65, + "sodium_100g": 0.26, + "carbohydrates_100g": 57, + "proteins_100g": 7.8, + "sugars_100g": 22, + "saturated-fat_100g": 13, + "fat_100g": 26 + }, + "completeness": 0.575, + "product_name": "Sablés aux céréales nappés chocolat noir" + }, + { + "code": "3173990027337", + "unique_scans_n": 7, + "last_modified_t": 1596539329, + "created_t": 1560770000, + "nutriments": { + "energy-kcal_100g": 455, + "fat_100g": 15, + "saturated-fat_100g": 1.6, + "proteins_100g": 5.7, + "salt_100g": 0.43, + "sugars_100g": 34, + "carbohydrates_100g": 70, + "sodium_100g": 0.172 + }, + "completeness": 0.275, + "product_name": "FOURRÉS MYRTILLES" + }, + { + "code": "6111262384327", + "unique_scans_n": 7, + "last_modified_t": 1672101522, + "created_t": 1659440000, + "nutriments": { + "carbohydrates_100g": 59.23, + "saturated-fat_100g": 20, + "salt_100g": 0.53, + "sodium_100g": 0.21200000000000002, + "energy-kcal_100g": 553, + "proteins_100g": 5.92, + "fiber_100g": 0, + "sugars_100g": 35.7, + "fat_100g": 32.58 + }, + "completeness": 0.275, + "product_name": "Tobigo" + }, + { + "code": "29161690", + "unique_scans_n": 5, + "last_modified_t": 1710665613, + "created_t": 1633290000, + "nutriments": { + "sugars_100g": 3.7, + "fiber_100g": 11, + "saturated-fat_100g": 4.5, + "proteins_100g": 21.2, + "energy-kcal_100g": 613, + "carbohydrates_100g": 12.6, + "fat_100g": 50.6 + }, + "completeness": 0.475, + "product_name": "100 % Almond Buter" + }, + { + "code": "3256220513173", + "categories": [ + "en:beverages", + "en:syrups", + "en:sweetened-beverages" + ], + "labels": [ + "en:no-preservatives", + "en:green-dot", + "en:transformed-in-france" + ], + "unique_scans_n": 5, + "nova_groups": "4", + "last_modified_t": 1644531894, + "created_t": 1475310000, + "nutriments": { + "energy-kcal_100g": 330, + "proteins_100g": 0.1, + "carbohydrates_100g": 80, + "sugars_100g": 79, + "fat_100g": 0.1, + "fiber_100g": 0.5, + "saturated-fat_100g": 0.1, + "salt_100g": 0.01, + "energy-kj_100g": 1402, + "sodium_100g": 0.004 + }, + "completeness": 0.8874999999999998, + "product_name": "Sirop de thé pêche" + }, + { + "code": "0025000044984", + "categories": [ + "en:beverages", + "en:carbonated-drinks", + "en:sodas", + "en:lemonade", + "en:sweetened-beverages" + ], + "labels": [ + "en:no-added-sugar" + ], + "unique_scans_n": 4, + "nova_groups": "4", + "last_modified_t": 1693247412, + "created_t": 1533230000, + "nutriments": { + "proteins_100g": 0.83333333333333, + "energy-kcal_100g": 45.833333333333, + "sugars_100g": 9.5833333333333, + "carbohydrates_100g": 10.833333333333, + "salt_100g": 0, + "saturated-fat_100g": 0, + "fiber_100g": 0, + "fat_100g": 0, + "sodium_100g": 0 + }, + "completeness": 0.475, + "product_name": "Simply lemonade" + }, + { + "code": "4056489647744", + "categories": [ + "en:crumpets" + ], + "unique_scans_n": 4, + "nova_groups": "4", + "last_modified_t": 1710268814, + "created_t": 1660840000, + "nutriments": { + "energy-kj_100g": 801, + "carbohydrates_100g": 38.4, + "fat_100g": 0.7, + "salt_100g": 0.94, + "energy-kcal_100g": 189, + "saturated-fat_100g": 0.1, + "sugars_100g": 2.7, + "fiber_100g": 2.5, + "sodium_100g": 0.376, + "proteins_100g": 6 + }, + "completeness": 0.6874999999999999, + "product_name": "Crumpets" + } + ] \ No newline at end of file diff --git a/tests/int/data/test_off_data.jsonl b/tests/int/data/test_off_data.jsonl new file mode 100644 index 00000000..31f869ba --- /dev/null +++ b/tests/int/data/test_off_data.jsonl @@ -0,0 +1,9 @@ +{"code": "30123457678901", "categories_tags": ["en:sweeteners", "en:sugars", "en:granulated-sugars"], "labels_tags": ["en:no-lactose"], "unique_scans_n": 15, "nova_groups": "2", "last_modified_t": 1700525044, "created_t": 1537090000, "nutriments": {"fat_100g": 0, "energy-kcal_100g": 400, "proteins_100g": 0, "saturated-fat_100g": 0, "salt_100g": 0, "carbohydrates_100g": 100, "sugars_100g": 100, "sodium_100g": 0}, "completeness": 0.5874999999999999, "product_name": "Granulated sugar", "product_name_en": "Granulated sugar", "product_name_fr": "Sucre semoule"} +{"code": "30123457678902", "categories_tags": ["en:plant-based-foods-and-beverages", "en:plant-based-foods", "en:fruits-and-vegetables-based-foods", "en:vegetables-based-foods", "en:dried-products", "en:dried-plant-based-foods", "en:tomatoes-and-their-products", "en:dried-vegetables", "en:dried-tomatoes", "fr:tortellini-pesto-rosso"], "labels_tags": ["en:no-artificial-flavors", "en:no-preservatives", "en:nutriscore", "en:nutriscore-grade-a"], "unique_scans_n": 14, "last_modified_t": 1682531455, "created_t": 1651170000, "nutriments": {"carbohydrates_100g": 41, "proteins_100g": 11, "saturated-fat_100g": 2.6, "sugars_100g": 6, "fat_100g": 10, "energy-kj_100g": 1303, "energy-kcal_100g": 310, "fiber_100g": 6.1, "salt_100g": 0.65, "sodium_100g": 0.26}, "completeness": 0.6499999999999999, "product_name": "Tortellini pesto Rosso", "product_name_it": "Tortellini pesto Rosso"} +{"code": "30123457678903", "categories_tags": ["en:dairies", "en:desserts", "en:dairy-desserts", "en:creamy-puddings", "en:coffee-creamy-puddings"], "labels_tags": ["en:fair-trade", "en:organic", "en:eu-organic", "en:fairtrade-international", "en:eu-agriculture", "en:fr-bio-01", "en:max-havelaar", "en:nutriscore", "en:nutriscore-grade-c", "fr:ab-agriculture-biologique"], "unique_scans_n": 13, "nova_groups": "3", "last_modified_t": 1677189702, "created_t": 1601970000, "nutriments": {"proteins_100g": 3.9, "carbohydrates_100g": 19.2, "fat_100g": 5, "saturated-fat_100g": 3.5, "sugars_100g": 15, "energy-kcal_100g": 137, "sodium_100g": 0.16, "salt_100g": 0.4}, "completeness": 0.7874999999999999, "product_name_fr": "Yaourt Cr\u00e9meuh Caf\u00e9", "product_name": "Yaourt Cr\u00e9meuh Caf\u00e9"} +{"code": "30123457678904", "categories_tags": ["en:dairies", "en:desserts", "en:dairy-desserts", "en:creamy-puddings"], "labels_tags": ["en:fair-trade", "en:nutriscore", "en:nutriscore-grade-c"], "unique_scans_n": 13, "nova_groups": "3", "last_modified_t": 1677189702, "created_t": 1601970000, "nutriments": {"proteins_100g": 3.9, "carbohydrates_100g": 19.2, "fat_100g": 5, "saturated-fat_100g": 3.5, "sugars_100g": 15, "energy-kcal_100g": 137, "sodium_100g": 0.16, "salt_100g": 0.4}, "completeness": 0.7874999999999999, "product_name": "Yaourt Cr\u00e9meuh Nature", "product_name_fr": "Yaourt Cr\u00e9meuh Nature"} +{"code": "30123457678905", "categories_tags": ["en:snacks", "en:sweet-snacks", "en:biscuits-and-cakes", "en:biscuits", "en:chocolate-biscuits", "en:shortbread-cookies", "en:digestives-covered-with-chocolate"], "labels_tags": ["en:organic", "en:eu-organic", "en:non-eu-agriculture", "en:eu-agriculture", "en:eu-non-eu-agriculture", "en:fr-bio-01", "en:nutriscore", "en:nutriscore-grade-d", "fr:ab-agriculture-biologique"], "unique_scans_n": 12, "nova_groups": "3", "last_modified_t": 1701050992, "created_t": 1678120000, "nutriments": {"energy-kcal_100g": 505, "fiber_100g": 6, "salt_100g": 0.65, "sodium_100g": 0.26, "carbohydrates_100g": 57, "proteins_100g": 7.8, "sugars_100g": 22, "saturated-fat_100g": 13, "fat_100g": 26}, "completeness": 0.575, "product_name": "Sabl\u00e9s aux c\u00e9r\u00e9ales napp\u00e9s chocolat noir", "product_name_fr": "Sabl\u00e9s aux c\u00e9r\u00e9ales napp\u00e9s chocolat noir"} +{"code": "30123457678906", "categories_tags": ["en:snacks", "en:sweet-snacks", "en:biscuits-and-cakes", "en:biscuits", "en:chocolate-biscuits"], "labels_tags": ["en:organic", "en:eu-organic"], "unique_scans_n": 12, "nova_groups": "4", "last_modified_t": 1701050992, "created_t": 1678120000, "nutriments": {"energy-kcal_100g": 505, "fiber_100g": 6, "salt_100g": 0.65, "sodium_100g": 0.26, "carbohydrates_100g": 57, "proteins_100g": 7.8, "sugars_100g": 22, "saturated-fat_100g": 13, "fat_100g": 26}, "completeness": 0.575, "product_name_fr": "Sabl\u00e9s de chocolat", "product_name_en": "Chocolate shortbread", "product_name": "Chocolate shortbread"} +{"code": "30123457678907", "categories_tags": ["en:snacks", "en:sweet-snacks", "en:biscuits-and-cakes", "en:biscuits", "en:chocolate-biscuits"], "labels_tags": ["en:organic", "en:eu-organic"], "unique_scans_n": 12, "nova_groups": "3", "last_modified_t": 1701050992, "created_t": 1678120000, "nutriments": {"energy-kcal_100g": 505, "fiber_100g": 6, "salt_100g": 0.65, "sodium_100g": 0.26, "carbohydrates_100g": 57, "proteins_100g": 7.8, "sugars_100g": 22, "saturated-fat_100g": 13, "fat_100g": 26}, "completeness": 0.575, "product_name": "Finest chocolate shortbread", "product_name_en": "Finest chocolate shortbread", "product_name_fr": "Sabl\u00e9s de chocolat fins"} +{"code": "30123457678908", "categories_tags": ["en:beverages", "en:syrups", "en:sweetened-beverages"], "labels_tags": ["en:no-preservatives", "en:green-dot", "en:transformed-in-france"], "unique_scans_n": 5, "nova_groups": "4", "last_modified_t": 1644531894, "created_t": 1475310000, "nutriments": {"energy-kcal_100g": 330, "proteins_100g": 0.1, "carbohydrates_100g": 80, "sugars_100g": 79, "fat_100g": 0.1, "fiber_100g": 0.5, "saturated-fat_100g": 0.1, "salt_100g": 0.01, "energy-kj_100g": 1402, "sodium_100g": 0.004}, "completeness": 0.8874999999999998, "product_name": "Sirop de th\u00e9 p\u00eache", "product_name_fr": "Sirop de th\u00e9 p\u00eache"} +{"code": "30123457678909", "categories_tags": ["en:beverages", "en:carbonated-drinks", "en:sodas", "en:lemonade", "en:sweetened-beverages"], "labels_tags": ["en:no-added-sugar"], "unique_scans_n": 4, "nova_groups": "4", "last_modified_t": 1693247412, "created_t": 1533230000, "nutriments": {"proteins_100g": 0.83333333333333, "energy-kcal_100g": 45.833333333333, "sugars_100g": 9.5833333333333, "carbohydrates_100g": 10.833333333333, "salt_100g": 0, "saturated-fat_100g": 0, "fiber_100g": 0, "fat_100g": 0, "sodium_100g": 0}, "completeness": 0.475, "product_name": "Simply lemonade", "product_name_en": "Simply lemonade"} diff --git a/tests/int/data/test_off_data_update.jsonl b/tests/int/data/test_off_data_update.jsonl new file mode 100644 index 00000000..f2a61cbd --- /dev/null +++ b/tests/int/data/test_off_data_update.jsonl @@ -0,0 +1,2 @@ +{"code": "30123457678901", "categories_tags": ["en:sweeteners", "en:sugars"], "unique_scans_n": 18, "nova_groups": "2", "last_modified_t": 1700545044, "created_t": 1537090000, "nutriments": {"fat_100g": 0, "energy-kcal_100g": 400, "proteins_100g": 0, "saturated-fat_100g": 0, "salt_100g": 0.1, "carbohydrates_100g": 100, "sugars_100g": 100, "sodium_100g": 0.001}, "completeness": 0.5874999999999999, "product_name": "Granulated sugar 2", "product_name_en": "Granulated sugar 2" , "product_name_fr": "Sucre semoule 2"} +{"code": "30123457678910", "categories_tags": ["en:plant-based-foods-and-beverages", "en:plant-based-foods", "en:fruits-and-vegetables-based-foods", "en:vegetables-based-foods", "en:dried-products", "en:dried-plant-based-foods", "en:tomatoes-and-their-products", "en:dried-vegetables", "en:dried-tomatoes", "fr:tortellini-pesto-rosso"], "labels_tags": ["en:no-artificial-flavors", "en:no-preservatives", "en:nutriscore", "en:nutriscore-grade-a"], "unique_scans_n": 14, "last_modified_t": 1682531455, "created_t": 1651170000, "nutriments": {"carbohydrates_100g": 41, "proteins_100g": 11, "saturated-fat_100g": 2.6, "sugars_100g": 6, "fat_100g": 10, "energy-kj_100g": 1303, "energy-kcal_100g": 310, "fiber_100g": 6.1, "salt_100g": 0.65, "sodium_100g": 0.26}, "completeness": 0.6499999999999999, "product_name": "Tortellini pesto Verde", "product_name_it": "Tortellini pesto Verde"} diff --git a/tests/int/data_generation.py b/tests/int/data_generation.py new file mode 100644 index 00000000..5447d0b3 --- /dev/null +++ b/tests/int/data_generation.py @@ -0,0 +1,152 @@ +import hashlib +import json +import tempfile + +import factory + +from app._import import ( + get_alias, + perform_refresh_synonyms, + perform_taxonomy_import, + run_items_import, + update_alias, +) + + +class Nutriments(factory.DictFactory): + fat_100g = 0 + energy_kcal_100g = 400 + proteins_100g = 0 + saturated_fat_100g = 0 + salt_100g = 0 + carbohydrates_100g = 100 + sugars_100g = 100 + sodium_100g = 0 + + class Meta: + # Ensure dict field uses hyphens not underscores. + rename = { + "energy_kcal_100g": "energy-kcal_100g", + "saturated_fat_100g": "saturated-fat_100g", + } + + +class Product(factory.DictFactory): + code = factory.Sequence(lambda n: f"30123457{n:05d}") + categories_tags = ["en:sweeteners", "en:sugars", "en:granulated-sugars"] + labels_tags = ["en:no-lactose"] + unique_scans_n = 15 + nova_groups = "2" + last_modified_t = 1700525044 + created_t = 1537090000 + completeness = 0.5874999999999999 + product_name_en = "Granulated sugar" + product_name_fr = "Sucre semoule" + lc = "en" + product_name = factory.LazyAttribute( + lambda o: getattr(o, "product_name_" + getattr(o, "lc")) + ) + nutriments = factory.SubFactory(Nutriments) + + +# we keep track of ingested, +# associating data to a sha256 hash with index name +INGESTED_DATA: dict[str, str] = {} + + +def ingest_data(data, index_name, index_config, es_connection, read_only=True): + """Ingest test data into ES + + Try to re-use existing index if possible + """ + data_sha256 = ( + index_name + hashlib.sha256(json.dumps(data).encode("utf-8")).hexdigest() + ) + if data_sha256 not in INGESTED_DATA or not _index_exists( + es_connection, INGESTED_DATA[data_sha256] + ): + # ingest data + with tempfile.NamedTemporaryFile("w+t", suffix=".jsonl") as f: + f.write("\n".join(json.dumps(d) for d in data)) + f.flush() + num_errors = run_items_import( + f.name, + 1, + index_config, + skip_updates=True, + partial=False, + ) + if num_errors: + raise RuntimeError(f"{num_errors} errors while ingesting data") + # remember the alias for later use + INGESTED_DATA[data_sha256] = get_alias(es_connection, index_name) + real_index = INGESTED_DATA[data_sha256] + if not read_only: + # clone the index because we will modify it + es_connection.indices.clone( + source=real_index, + target=real_index + "-clone", + wait_for_completion=True, + ) + real_index = real_index + "-clone" + # now just update the alias to point to the known index + update_alias(es_connection, real_index, index_name) + return real_index + + +INGESTED_TAXONOMY: dict[str, str] = {} + + +def _index_exists(es_connection, name): + return es_connection.indices.exists(index=name) + + +def ingest_taxonomies(index_id, index_config, es_connection): + """Ingest taxonomies into ES""" + if index_id not in INGESTED_TAXONOMY or not _index_exists( + es_connection, INGESTED_TAXONOMY[index_id] + ): + perform_taxonomy_import(index_config) + perform_refresh_synonyms( + index_id, + index_config, + ) + INGESTED_TAXONOMY[index_id] = get_alias( + es_connection, index_config.taxonomy.index.name + ) + else: + # just update the alias to point to the good index + update_alias( + es_connection, INGESTED_TAXONOMY[index_id], index_config.taxonomy.index.name + ) + + +def save_state(index_id, index_config, es_connection): + """Save state in a particular index in ES, + so that even between test run, we minimize the number of ingestion + """ + es_connection.index( + index="test-" + index_id, + id="state", + document={ + "ingested_taxonomies": INGESTED_TAXONOMY, + "ingested_data": INGESTED_DATA, + }, + ) + + +def load_state(index_id, index_config, es_connection): + state_idx = "test-" + index_id + if es_connection.exists(index=state_idx, id="state"): + state = es_connection.get(index=state_idx, id="state")["_source"] + INGESTED_DATA.update(state["ingested_data"]) + INGESTED_TAXONOMY.update(state["ingested_taxonomies"]) + + +def delete_es_indices(es_connection): + """Do a full cleanup of ES, including deleting all indexes""" + for index in es_connection.indices.get(index="*", expand_wildcards="all"): + if index.startswith(".") and not index.startswith("test-"): + # skip special indexes + continue + es_connection.indices.delete(index=index) diff --git a/tests/int/helpers.py b/tests/int/helpers.py new file mode 100644 index 00000000..983811c6 --- /dev/null +++ b/tests/int/helpers.py @@ -0,0 +1,74 @@ +import glob +import json +import os + +from app._import import BaseDocumentFetcher +from app._types import FetcherResult, FetcherStatus, JSONType +from app.indexing import BaseDocumentPreprocessor +from app.postprocessing import BaseResultProcessor + + +class CallRegistration: + """A class to register calls + + Because search-a-licious may launch different process + We need to register them in a persistent way + """ + + _fname = None + + @classmethod + def register_call(cls, *args): + if cls._fname is None: + cls._fname = f"/tmp/{cls.__name__}-{os.getpid()}.jsonl" + with open(cls._fname, "a") as f: + f.write(json.dumps(args if len(args) > 1 else args[0]) + "\n") + + @classmethod + def clean_calls(cls): + for fpath in glob.glob(f"/tmp/{cls.__name__}-*.jsonl"): + os.remove(fpath) + + @classmethod + def get_calls(cls): + # get all calls, that might have been made in other processes + calls = [ + json.loads(line) + for fpath in glob.glob(f"/tmp/{cls.__name__}-*.jsonl") + for line in open(fpath) + if line.strip() + ] + return calls + + +class TestDocumentFetcher(BaseDocumentFetcher, CallRegistration): + + def fetch_document(self, stream_name: str, item: JSONType) -> FetcherResult: + self.register_call(item) + return FetcherResult( + status=FetcherStatus.FOUND, + document=item, + ) + + +class TestDocumentPreprocessor(BaseDocumentPreprocessor, CallRegistration): + + def preprocess(self, document: JSONType) -> FetcherResult: + self.register_call(document) + # FIXME if we could specify sub object field we would not need this + # but for now we want tests to pass + # ensure floats + for key, value in list(document.get("nutriments", {}).items()): + if value is not None: + document["nutriments"][key] = float(value) + return FetcherResult( + status=FetcherStatus.FOUND, + document=document, + ) + + +class TestResultProcessor(BaseResultProcessor, CallRegistration): + + def process_after(self, result: JSONType) -> JSONType: + self.register_call(result) + return result diff --git a/tests/int/test_analyze.py b/tests/int/test_analyze.py new file mode 100644 index 00000000..4de81d45 --- /dev/null +++ b/tests/int/test_analyze.py @@ -0,0 +1,114 @@ +"""Some tests on analyzer + +Those are placed as integration test because we want to test against Elasticsearch +from the analyzers built by search-a-licious + +For explanations on what we our testing here, +see https://openfoodfacts.github.io/search-a-licious/users/explain-taxonomies +""" + +import pytest + +from app.utils.analyzers import ( + get_taxonomy_indexing_analyzer, + get_taxonomy_search_analyzer, +) + + +def _tokens(result): + return [part["token"] for part in result["tokens"]] + + +def test_taxonomy_indexing_analyzer(es_connection, data_ingester): + # create the index, with synonyms + data_ingester([]) + index_en = get_taxonomy_indexing_analyzer("labels", "en").to_dict() + index_fr = get_taxonomy_indexing_analyzer("labels", "fr").to_dict() + # no change for simple entries + result = es_connection.indices.analyze( + index="test_off", + analyzer=index_en, + text="en:organic", + ) + assert _tokens(result) == ["en:organic"] + + # the hyphen is replaced by underscore + result = es_connection.indices.analyze( + index="test_off", + analyzer=index_en, + text="en:organic-farming_2", + ) + assert _tokens(result) == ["en:organic_farming_2"] + # whatever the language + result = es_connection.indices.analyze( + index="test_off", + analyzer=index_fr, + text="en:organic-farming_2", + ) + assert _tokens(result) == ["en:organic_farming_2"] + + +def test_taxonomy_search_analyzer(es_connection, data_ingester): + # create the index, with synonyms + data_ingester([]) + search_en = get_taxonomy_search_analyzer("labels", "en", True).to_dict() + search_fr = get_taxonomy_search_analyzer("labels", "fr", True).to_dict() + # bare term is not changed, but hyphen is replaced by underscore + for analyzer in [search_en, search_fr]: + result = es_connection.indices.analyze( + index="test_off", + analyzer=analyzer, + text="en:organic-farming_2", + ) + assert _tokens(result) == ["en:organic_farming_2"] + + # synonym is replaced by the synonym + result = es_connection.indices.analyze( + index="test_off", + analyzer=search_en, + text="organically grown plants", + ) + assert "en:organic" in _tokens(result) + # with hyphen to underscore + result = es_connection.indices.analyze( + index="test_off", + analyzer=search_en, + text="european leaf", + ) + assert _tokens(result) == ["en:eu_organic"] + # french synonyms + result = es_connection.indices.analyze( + index="test_off", + analyzer=search_fr, + text="feuille bio", + ) + assert _tokens(result) == ["en:eu_organic"] + # quote handling + result = es_connection.indices.analyze( + index="test_off", + analyzer=search_fr, + text="l'agriculture", + ) + assert _tokens(result) == ["l", "agriculture"] + result = es_connection.indices.analyze( + index="test_off", + analyzer=search_fr, + text="issue de l'agriculture biologique", + ) + assert _tokens(result) == ["en:organic"] + + +@pytest.mark.xfail(reason="No stop words support yet") +def test_taxonomy_search_analyzer_stopwords(es_connection, data_ingester): + # create the index, with synonyms + data_ingester([]) + search_fr = get_taxonomy_search_analyzer("labels", "fr", True).to_dict() + + # simple stop words taken into account + result = es_connection.indices.analyze( + index="test_off", + analyzer=search_fr, + # en ignored as well as "de l'" in target synonym + text="issue en agriculture biologique", + ) + assert _tokens(result) == ["en:eu_organic"] diff --git a/tests/int/test_import_data.py b/tests/int/test_import_data.py new file mode 100644 index 00000000..46d06cd4 --- /dev/null +++ b/tests/int/test_import_data.py @@ -0,0 +1,160 @@ +from tests.cli_utils import runner_invoke +from tests.int import helpers + + +def test_import_data(test_off_config, es_connection, synonyms_created): + helpers.TestDocumentFetcher.clean_calls() + helpers.TestDocumentPreprocessor.clean_calls() + # Important note: we use multiprocessing which is not compatible with pytest logging + # So you may loose information about errors + result = runner_invoke( + "import", "/opt/search/tests/int/data/test_off_data.jsonl", "--skip-updates" + ) + assert result.exit_code == 0 + assert "Import time" in result.stderr + # no fetch + fetcher_calls = helpers.TestDocumentFetcher.get_calls() + assert len(fetcher_calls) == 0 + # pre-processor was called for each document + pre_processor_calls = helpers.TestDocumentPreprocessor.get_calls() + assert len(pre_processor_calls) == 9 + assert set(item["code"] for item in pre_processor_calls) == set( + f"3012345767890{i}" for i in range(1, 10) + ) + # assert we got the index with the right alias + aliases = es_connection.indices.get_alias(index="test_off") + assert len(aliases) == 1 + index_name = list(aliases.keys())[0] + assert index_name.startswith("test_off-") + assert aliases[index_name]["aliases"] == {"test_off": {}} + # and the right number of entries + assert es_connection.count(index="test_off")["count"] == 9 + # test on one entry + document = es_connection.get(index="test_off", id="30123457678901")["_source"] + last_index_1 = document.pop("last_indexed_datetime") + assert last_index_1 + assert document == { + "code": "30123457678901", + "product_name": { + "main": "Granulated sugar", + "en": "Granulated sugar", + "fr": "Sucre semoule", + }, + "categories": ["en:sweeteners", "en:sugars", "en:granulated-sugars"], + "labels": ["en:no-lactose"], + "unique_scans_n": 15, + "nova_groups": "2", + "last_modified_t": 1700525044, + "created_t": 1537090000, + "nutriments": { + "fat_100g": 0.0, + "energy-kcal_100g": 400.0, + "proteins_100g": 0.0, + "saturated-fat_100g": 0.0, + "salt_100g": 0.0, + "carbohydrates_100g": 100.0, + "sugars_100g": 100.0, + "sodium_100g": 0.0, + }, + "completeness": 0.5874999999999999, + } + + # we now import data again, to verify it creates a new index + old_index_name = index_name + result = runner_invoke( + "import", "/opt/search/tests/int/data/test_off_data.jsonl", "--skip-updates" + ) + assert result.exit_code == 0 + # assert we got the index with the right alias + aliases = es_connection.indices.get_alias(index="test_off") + assert len(aliases) == 1 + index_name = list(aliases.keys())[0] + assert index_name != old_index_name + # entries are there + assert es_connection.count(index="test_off")["count"] == 9 + + # test in place update + helpers.TestDocumentPreprocessor.clean_calls() + old_index_name = index_name + result = runner_invoke( + "import", + "/opt/search/tests/int/data/test_off_data_update.jsonl", + "--partial", + "--skip-updates", + ) + assert result.exit_code == 0 + assert "Import time" in result.stderr + # pre-processor was called for each document + pre_processor_calls = helpers.TestDocumentPreprocessor.get_calls() + assert len(pre_processor_calls) == 2 + # alias is still the same + aliases = es_connection.indices.get_alias(index="test_off") + assert len(aliases) == 1 + index_name = list(aliases.keys())[0] + assert index_name == old_index_name + # old and new entries are there + assert es_connection.count(index="test_off")["count"] == 10 + # test our modified one entry + document = es_connection.get(index="test_off", id="30123457678901")["_source"] + assert document.pop("last_indexed_datetime") > last_index_1 + assert document == { + "code": "30123457678901", + "product_name": { + "main": "Granulated sugar 2", + "en": "Granulated sugar 2", + "fr": "Sucre semoule 2", + }, + "categories": ["en:sweeteners", "en:sugars"], + "unique_scans_n": 18, + "nova_groups": "2", + "last_modified_t": 1700545044, + "created_t": 1537090000, + "nutriments": { + "fat_100g": 0.0, + "energy-kcal_100g": 400.0, + "proteins_100g": 0.0, + "saturated-fat_100g": 0.0, + "salt_100g": 0.1, + "carbohydrates_100g": 100.0, + "sugars_100g": 100.0, + "sodium_100g": 0.001, + }, + "completeness": 0.5874999999999999, + } + # our new document is there + assert es_connection.get(index="test_off", id="30123457678910")["_source"] + + +def test_cleanup_indexes(test_off_config, es_connection): + # clean ES first + for index_name in es_connection.indices.get(index="*").keys(): + es_connection.indices.delete(index=index_name) + # create some indices corresponding to test_off, with last aliased + es_connection.indices.create(index="test_off-2024-05-25") + es_connection.indices.create(index="test_off-2024-06-25") + es_connection.indices.create(index="test_off-2024-07-25", aliases={"test_off": {}}) + # same for taxonomies + es_connection.indices.create(index="test_off_taxonomy-2024-05-25") + es_connection.indices.create(index="test_off_taxonomy-2024-06-25") + es_connection.indices.create( + index="test_off_taxonomy-2024-07-25", aliases={"test_off_taxonomy": {}} + ) + # and some unrelated indexes + es_connection.indices.create(index="something-else-2024-05-25") + es_connection.indices.create( + index="something-else-2024-07-25", aliases={"something-else": {}} + ) + + # run the cleanup + result = runner_invoke("cleanup-indexes") + assert result.exit_code == 0 + # assert we got the index with the right alias, and other indexes untouched + aliases = es_connection.indices.get_alias(index="*") + assert dict(aliases) == { + # only aliases where kept + "test_off_taxonomy-2024-07-25": {"aliases": {"test_off_taxonomy": {}}}, + "test_off-2024-07-25": {"aliases": {"test_off": {}}}, + # other untouched + "something-else-2024-05-25": {"aliases": {}}, + "something-else-2024-07-25": {"aliases": {"something-else": {}}}, + } diff --git a/tests/int/test_import_taxonomies.py b/tests/int/test_import_taxonomies.py new file mode 100644 index 00000000..7d939477 --- /dev/null +++ b/tests/int/test_import_taxonomies.py @@ -0,0 +1,32 @@ +import glob + +from tests.cli_utils import runner_invoke + + +def test_import_taxonomies(test_off_config, es_connection): + result = runner_invoke("import-taxonomies") + assert result.exit_code == 0 + assert "Import time" in result.stderr + assert "Synonyms generation time" in result.stderr + # assert we got the index with the right alias + aliases = es_connection.indices.get_alias(index="test_off_taxonomy") + assert len(aliases) == 1 + index_name = list(aliases.keys())[0] + assert index_name.startswith("test_off_taxonomy-") + assert aliases[index_name]["aliases"] == {"test_off_taxonomy": {}} + # and the right number of entries, 33 categories and 20 labels + assert es_connection.count(index="test_off_taxonomy")["count"] == 20 + 33 + # assert synonyms files gets generated + assert sorted(glob.glob("/opt/search/synonyms/*/*")) == [ + "/opt/search/synonyms/categories/en.txt", + "/opt/search/synonyms/categories/fr.txt", + "/opt/search/synonyms/categories/main.txt", + "/opt/search/synonyms/labels/en.txt", + "/opt/search/synonyms/labels/fr.txt", + "/opt/search/synonyms/labels/main.txt", + ] + # with right format + assert ( + "beverages with added sugar,sugared beverages,sweetened beverages => en:sweetened-beverages\n" + in open("/opt/search/synonyms/categories/en.txt") + ) diff --git a/tests/int/test_search.py b/tests/int/test_search.py new file mode 100644 index 00000000..92ee7460 --- /dev/null +++ b/tests/int/test_search.py @@ -0,0 +1,454 @@ +from typing import Any, Literal + +import pytest +from fastapi import Response + +from .data_generation import Product + + +def search_sample(): + # category granulated sugars has synonyms white sugars / refined sugars + # and in french saccharose, saccarose, sucre blanc, suche de table + # label organic has synonyms organically grown / organically produced + # / from organic farming + # and french bio / biologique / issu de l'agriculture biologique + + # some brown sugar nutrients + brown_sugar_nutriments = dict( + nutriments__energy_kcal_100g=350, + nutriments__carbohydrates_100g=90, + nutriments__sugars_100g=90, + nutriments__proteins_100g=1.5, + ) + data = [ + # some sugar + Product( + code="3012345670001", + product_name_en="Main Granulated Sugar", + product_name_fr="Sucre semoule principal", + categories_tags=["en:sweeteners", "en:sugars", "en:granulated-sugars"], + labels_tags=["en:no-lactose", "en:organic"], + ), + Product( + code="3012345670002", + product_name_en="Organic Granulated Sugar", + product_name_fr="Sucre semoule bio", + categories_tags=["en:sweeteners", "en:sugars", "en:granulated-sugars"], + labels_tags=["en:organic"], + ), + Product( + code="3012345670003", + product_name_en="No Lactose Granulated Sugar", + product_name_fr=None, + categories_tags=["en:sweeteners", "en:sugars", "en:granulated-sugars"], + labels_tags=["en:no-lactose"], + ), + Product( + code="3012345670004", + product_name_en="No label Granulated Sugar", + product_name_fr=None, + categories_tags=["en:sweeteners", "en:sugars", "en:granulated-sugars"], + labels_tags=None, + ), + Product( + code="3012345670005", + product_name_en="Organic Brown Sugar", + product_name_fr="Sucre roux bio", + categories_tags=["en:sweeteners", "en:sugars", "en:brown-sugars"], + labels_tags=["en:organic"], + **brown_sugar_nutriments, + ), + Product( + code="3012345670006", + product_name_en="Brown Sugar", + product_name_fr="Sucre roux", + categories_tags=["en:sweeteners", "en:sugars", "en:brown-sugars"], + labels_tags=[], + **brown_sugar_nutriments, + ), + ] + # make created_t, modified_t and unique_scans_n predictable for sorting + created_t = 1537090000 + modified_t = 1700525044 + day_in_second = 86400 + for i, product in enumerate(data): + product["created_t"] = created_t - i * day_in_second + product["last_modified_t"] = modified_t + i * day_in_second + product["unique_scans_n"] = (i + 1) * 100 + return data + + +@pytest.fixture +def sample_data(data_ingester): + data = search_sample() + data_ingester(data) + yield data + + +def hits_attr(data, name): + """small utility to list attrs""" + return [product[name] for product in data["hits"]] + + +def list_of_dict_to_comparable(list_of_dicts) -> set[tuple[tuple[Any]]]: + """We want to compare lists of dict without taking order into account, + but they are not hashable + + transform to set of tuples of tuples + """ + return set(tuple(sorted(dict_.items())) for dict_ in list_of_dicts) + + +GetType = Literal["GET"] +PostType = Literal["POST"] +GetOrPostType = GetType | PostType +GET_POST: list[GetOrPostType] = ["GET", "POST"] + + +def do_search( + test_client, req_type: GetOrPostType, params: dict[str, Any], code=200 +) -> tuple[Response, dict[str, Any]]: + if req_type == "GET": + # eventually transform list[str] to str + for field in ("langs", "fields", "debug_info", "facets", "charts"): + if isinstance(params.get(field), list): + params[field] = ",".join(params[field]) + if params.get("boost_phrase"): + params["boost_phrase"] = "1" + resp = test_client.get("/search", params=params) + else: + resp = test_client.post("/search", json=params) + assert resp.status_code == code + return resp, resp.json() + + +@pytest.mark.parametrize("req_type", GET_POST) +def test_search_all(req_type, sample_data, test_client): + _, data = do_search(test_client, req_type, {"sort_by": "unique_scans_n"}) + # all products + assert data["count"] == 6 + assert len(data["hits"]) == 6 + # no duplicates + assert len(set(hits_attr(data, "code"))) == 6 + # sorted ok + assert hits_attr(data, "unique_scans_n") == list(range(100, 700, 100)) + + +@pytest.mark.parametrize("req_type", GET_POST) +def test_search_sort_by_created_t(req_type, sample_data, test_client): + _, data = do_search(test_client, req_type, {"sort_by": "created_t"}) + # all products + assert data["count"] == 6 + assert len(data["hits"]) == 6 + # no duplicates + assert len(set(hits_attr(data, "code"))) == 6 + # sorted ok + created_t = hits_attr(data, "created_t") + assert sorted(created_t) == created_t + + # reverse sort + _, data = do_search(test_client, req_type, {"sort_by": "-created_t"}) + # all products + assert data["count"] == 6 + # sorted ok + created_t = hits_attr(data, "created_t") + assert sorted(created_t) == list(reversed(created_t)) + + +ALL_CODES = [s["code"] for s in search_sample()] +ORGANIC_CODES = ["3012345670001", "3012345670002", "3012345670005"] +NO_LACTOSE_CODES = ["3012345670001", "3012345670003"] +BROWN_SUGAR_CODES = ["3012345670005", "3012345670006"] + + +def xfail_param(*args): + return pytest.param(*args, marks=pytest.mark.xfail) + + +@pytest.mark.parametrize("req_type", GET_POST) +@pytest.mark.parametrize( + "req,codes", + [ + # empty string query is not a problem + ({"q": "", "sort_by": "created_t"}, ALL_CODES), + # simple queries + ({"q": "sugar"}, ALL_CODES), + ({"q": "brown"}, ["3012345670005", "3012345670006"]), + # this also searches in labels + ({"q": "organic"}, ORGANIC_CODES), + # synonym of label organic, will work only if we boost phrase + ({"q": "organically grown", "boost_phrase": True}, ORGANIC_CODES), + # also works for translations + ({"q": "bio", "langs": ["fr"]}, ORGANIC_CODES), + ({"q": "bio", "langs": ["en,fr"]}, ORGANIC_CODES), + # with more terms this does not work, yet, see + xfail_param( + {"q": "organically grown plants", "boost_phrase": True}, ORGANIC_CODES + ), + # as phrase + ({"q": '"organically grown"'}, ORGANIC_CODES), + ( + {"q": '"issu de l\'agriculture biologique"', "langs": ["fr"]}, + ORGANIC_CODES, + ), + # handling of '-' + ({"q": 'labels:"en:no-lactose"', "langs": ["fr"]}, NO_LACTOSE_CODES), + # synonyms on label field + ({"q": 'labels:"organically grown"'}, ORGANIC_CODES), + # search a field + ({"q": "product_name:brown sugar"}, BROWN_SUGAR_CODES), + ({"q": 'product_name:"brown sugar"'}, BROWN_SUGAR_CODES), + ({"q": "product_name:Sucre roux", "langs": ["fr"]}, BROWN_SUGAR_CODES), + ({"q": 'product_name:"Sucre roux"', "langs": ["fr"]}, BROWN_SUGAR_CODES), + # search in multiple fields + ({"q": '"brown sugar" organic'}, ["3012345670005"]), + # search can use main language as fallback + ({"q": "Lactose", "langs": ["fr", "main"]}, ["3012345670003"]), + ({"q": "product_name:Lactose", "langs": ["fr", "main"]}, ["3012345670003"]), + ( + {"q": '"No Lactose Granulated Sugar"', "langs": ["fr", "main"]}, + ["3012345670003"], + ), + # without main fallback, no result + ({"q": "Lactose", "langs": ["fr"]}, []), + ], +) +def test_search_full_text(req_type, req, codes, sample_data, test_client): + _, data = do_search(test_client, req_type, req) + assert set(hits_attr(data, "code")) == set(codes) + + +def test_extra_params_rejected(test_client): + # lang instead of langs + resp = test_client.get("/search?sort_by=created_t&lang=fr") + assert resp.status_code == 422 + assert resp.json() == { + "detail": [ + { + "type": "extra_forbidden", + "loc": ["query", "lang"], + "msg": "Extra inputs are not permitted", + "input": "fr", + } + ] + } + resp = test_client.post("/search", json=dict(sort_by="created_t", lang="fr")) + assert resp.status_code == 422 + assert resp.json() == { + "detail": [ + { + "type": "extra_forbidden", + "loc": ["body", "lang"], + "msg": "Extra inputs are not permitted", + "input": "fr", + } + ] + } + + +@pytest.mark.parametrize( + "req_type,charts", + [ + ("GET", "categories,labels,unique_scans_n:completeness"), + ( + "POST", + [ + {"type": "DistributionChart", "field": "categories"}, + {"type": "DistributionChart", "field": "labels"}, + {"type": "ScatterChart", "x": "unique_scans_n", "y": "completeness"}, + ], + ), + ], +) +def test_simple_charts(req_type, charts, sample_data, test_client): + params = {"sort_by": "created_t", "langs": ["en"], "charts": charts} + _, data = do_search(test_client, req_type, params) + # does not alter search results + assert data["count"] == 6 + assert set(hits_attr(data, "code")) == set(ALL_CODES) + # charts are there + charts = data["charts"] + assert set(charts.keys()) == set( + ["categories", "labels", "unique_scans_n:completeness"] + ) + assert set(c["title"] for c in charts.values()) == set( + ["categories", "labels", "unique_scans_n x completeness"] + ) + assert charts["categories"]["data"][0]["values"] == [ + {"category": "en:brown-sugars", "amount": 2}, + {"category": "en:granulated-sugars", "amount": 4}, + {"category": "en:sugars", "amount": 6}, + {"category": "en:sweeteners", "amount": 6}, + ] + assert charts["labels"]["data"][0]["values"] == [ + {"category": "en:no-lactose", "amount": 2}, + {"category": "en:organic", "amount": 3}, + ] + assert list_of_dict_to_comparable( + charts["unique_scans_n:completeness"]["data"][0]["values"] + ) == list_of_dict_to_comparable( + [ + {"unique_scans_n": 600, "completeness": 0.5874999999999999}, + {"unique_scans_n": 500, "completeness": 0.5874999999999999}, + {"unique_scans_n": 400, "completeness": 0.5874999999999999}, + {"unique_scans_n": 300, "completeness": 0.5874999999999999}, + {"unique_scans_n": 200, "completeness": 0.5874999999999999}, + {"unique_scans_n": 100, "completeness": 0.5874999999999999}, + ] + ) + + +def test_charts_bad_fields_fails(test_client): + # non existing in distribution chart + params = {"sort_by": "created_t", "langs": ["en"]} + resp, _ = do_search( + test_client, "GET", dict(params, charts=["non_existing_field"]), code=422 + ) + assert "Unknown field name in facets/charts" in resp.text + assert "non_existing_field" in resp.text + # non agg in distribution chart + resp, _ = do_search( + test_client, "GET", dict(params, charts=["unique_scans_n"]), code=422 + ) + assert "Non aggregation field name in facets/charts" in resp.text + assert "unique_scans_n" in resp.text + # non numeric in scatter chart + resp, _ = do_search( + test_client, "GET", dict(params, charts=["labels:categories"]), code=422 + ) + assert "Non numeric field name" in resp.text + assert "labels" in resp.text + assert "categories" in resp.text + # non existing in scatter chart + resp, _ = do_search( + test_client, + "GET", + dict(params, charts=["non_existing_field:unique_scans_n"]), + code=422, + ) + assert "Unknown field name" in resp.text + assert "non_existing_field" in resp.text + + +@pytest.mark.parametrize("req_type", GET_POST) +def test_multi_lang(req_type, sample_data, test_client): + _, data = do_search(test_client, req_type, {"q": "roux", "langs": ["en", "fr"]}) + assert set(hits_attr(data, "code")) == set(BROWN_SUGAR_CODES) + _, data = do_search( + test_client, req_type, {"q": "product_name:roux", "langs": ["en", "fr"]} + ) + assert set(hits_attr(data, "code")) == set(BROWN_SUGAR_CODES) + + +@pytest.mark.parametrize("req_type", GET_POST) +def test_simple_facets(req_type, sample_data, test_client): + params = { + "sort_by": "created_t", + "langs": ["en"], + "facets": ["labels", "categories"], + } + _, data = do_search(test_client, req_type, params) + # does not alter search results + assert data["count"] == 6 + assert set(hits_attr(data, "code")) == set(ALL_CODES) + # facets are there + facets = data["facets"] + assert set(facets.keys()) == {"labels", "categories"} + assert list_of_dict_to_comparable( + facets["labels"]["items"] + ) == list_of_dict_to_comparable( + [ + {"key": "en:organic", "name": "Organic", "count": 3, "selected": False}, + { + "key": "en:no-lactose", + "name": "No lactose", + "count": 2, + "selected": False, + }, + ] + ) + assert list_of_dict_to_comparable( + facets["categories"]["items"] + ) == list_of_dict_to_comparable( + [ + {"key": "en:sugars", "name": "Sugars", "count": 6, "selected": False}, + { + "key": "en:sweeteners", + "name": "Sweeteners", + "count": 6, + "selected": False, + }, + { + "key": "en:granulated-sugars", + "name": "Granulated sugars", + "count": 4, + "selected": False, + }, + { + "key": "en:brown-sugars", + "name": "en:brown-sugars", + "count": 2, + "selected": False, + }, + ] + ) + + +@pytest.mark.parametrize("req_type", GET_POST) +def test_facets_bad_fields_fails(req_type, test_client): + params = {"sort_by": "created_t", "langs": ["en"]} + # non existing field + resp, _ = do_search( + test_client, req_type, dict(params, facets=["non_existing_field"]), code=422 + ) + assert "Unknown field name in facets/charts" in resp.text + assert "non_existing_field" in resp.text + # non agg field + resp, _ = do_search( + test_client, req_type, dict(params, facets=["unique_scans_n"]), code=422 + ) + assert "Non aggregation field name in facets/charts" in resp.text + assert "unique_scans_n" in resp.text + + +@pytest.mark.parametrize("req_type", GET_POST) +def test_pagination(req_type, sample_data, test_client): + params = {"sort_by": "code", "langs": ["en"]} + _, data = do_search(test_client, req_type, dict(params, page_size=2)) + assert data["count"] == 6 + assert data["page_size"] == 2 + assert data["page_count"] == 3 + assert hits_attr(data, "code") == ALL_CODES[:2] + _, data = do_search(test_client, req_type, dict(params, page_size=2, page=3)) + assert hits_attr(data, "code") == ALL_CODES[4:] + # uneven end + _, data = do_search(test_client, req_type, dict(params, page_size=5, page=2)) + assert data["page_count"] == 2 + assert hits_attr(data, "code") == ALL_CODES[-1:] + # out of range + _, data = do_search(test_client, req_type, dict(params, page_size=2, page=20)) + assert hits_attr(data, "code") == [] + + +@pytest.mark.parametrize("req_type", GET_POST) +def test_debug_infos(req_type, sample_data, test_client): + params = { + "q": "categories:organic", + "langs": ["en"], + "facets": ["labels"], + "debug_info": ["es_query", "lucene_query", "aggregations"], + } + _, data = do_search(test_client, req_type, params) + assert set(data["debug"].keys()) == {"lucene_query", "es_query", "aggregations"} + + +@pytest.mark.parametrize("req_type", GET_POST) +def test_fields(req_type, sample_data, test_client): + params = {"sort_by": "code", "langs": ["en"], "fields": ["code", "product_name"]} + _, data = do_search(test_client, req_type, params) + assert data["count"] == 6 + assert hits_attr(data, "code") == ALL_CODES + # we only get code and product_name + assert set( + attributes for result in data["hits"] for attributes in result.keys() + ) == {"code", "product_name"} diff --git a/tests/unit/data/complex_query.json b/tests/unit/data/complex_query.json index b61673a9..d9983c63 100644 --- a/tests/unit/data/complex_query.json +++ b/tests/unit/data/complex_query.json @@ -1,106 +1,115 @@ { - "text_query": "bacon de boeuf (countries_tags:\"en:italy\" AND (categories_tags:\"en:beef\" AND (nutriments.salt_100g:[2 TO *] OR nutriments.salt_100g:[0 TO 0.05])))", - "luqum_tree": "bacon de boeuf (countries_tags:\"en:italy\" AND (categories_tags:\"en:beef\" AND (nutriments.salt_100g:[2 TO *] OR nutriments.salt_100g:[0 TO 0.05])))", + "text_query": "bacon de boeuf (countries:italy AND (categories:\"en:beef\" AND (nutriments.salt_100g:[2 TO *] OR nutriments.salt_100g:[0 TO 0.05])))", + "luqum_tree": "((bacon AND de AND boeuf) OR \"bacon de boeuf\"^2) AND (countries.en:italy AND (categories.en:\"en:beef\" AND (nutriments.salt_100g:[2 TO *] OR nutriments.salt_100g:[0 TO 0.05])))", "es_query": { "query": { "bool": { - "should": [ + "must": [ { - "match_phrase": { - "product_name.en": { - "query": "bacon de boeuf", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "generic_name.en": { - "query": "bacon de boeuf", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "categories.en": { - "query": "bacon de boeuf", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "labels.en": { - "query": "bacon de boeuf", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "brands": { - "query": "bacon de boeuf", - "boost": 2.0 - } - } - }, - { - "multi_match": { - "query": "bacon de boeuf", - "fields": [ - "product_name.en", - "generic_name.en", - "categories.en", - "labels.en", - "brands" + "bool": { + "should": [ + { + "bool": { + "must": [ + { + "multi_match": { + "query": "bacon", + "zero_terms_query": "all", + "fields": [ + "product_name.en", + "generic_name.en", + "categories.en", + "labels.en", + "brands.en" + ], + "type": "best_fields" + } + }, + { + "multi_match": { + "query": "de", + "zero_terms_query": "all", + "fields": [ + "product_name.en", + "generic_name.en", + "categories.en", + "labels.en", + "brands.en" + ], + "type": "best_fields" + } + }, + { + "multi_match": { + "query": "boeuf", + "zero_terms_query": "all", + "fields": [ + "product_name.en", + "generic_name.en", + "categories.en", + "labels.en", + "brands.en" + ], + "type": "best_fields" + } + } + ] + } + }, + { + "multi_match": { + "boost": 2.0, + "query": "bacon de boeuf", + "fields": [ + "product_name.en", + "generic_name.en", + "categories.en", + "labels.en", + "brands.en" + ], + "type": "phrase" + } + } ] } - } - ], - "filter": [ + }, { "bool": { "must": [ + { + "match": { + "countries.en": { + "query": "italy", + "zero_terms_query": "all" + } + } + }, { "bool": { "must": [ { - "term": { - "countries_tags": { - "value": "en:italy" + "match_phrase": { + "categories.en": { + "query": "en:beef" } } }, { "bool": { - "must": [ + "should": [ { - "term": { - "categories_tags": { - "value": "en:beef" + "range": { + "nutriments.salt_100g": { + "gte": "2" } } }, { - "bool": { - "should": [ - { - "range": { - "nutriments.salt_100g": { - "gte": "2" - } - } - }, - { - "range": { - "nutriments.salt_100g": { - "lte": "0.05", - "gte": "0" - } - } - } - ] + "range": { + "nutriments.salt_100g": { + "lte": "0.05", + "gte": "0" + } } } ] @@ -112,66 +121,11 @@ ] } } - ], - "minimum_should_match": 1 + ] } }, "size": 25, "from": 25 }, - "fulltext": "bacon de boeuf", - "filter_query": { - "bool": { - "must": [ - { - "bool": { - "must": [ - { - "term": { - "countries_tags": { - "value": "en:italy" - } - } - }, - { - "bool": { - "must": [ - { - "term": { - "categories_tags": { - "value": "en:beef" - } - } - }, - { - "bool": { - "should": [ - { - "range": { - "nutriments.salt_100g": { - "gte": "2" - } - } - }, - { - "range": { - "nutriments.salt_100g": { - "lte": "0.05", - "gte": "0" - } - } - } - ] - } - } - ] - } - } - ] - } - } - ] - } - }, "facets_filters": {} } \ No newline at end of file diff --git a/tests/unit/data/empty_query_with_sort_by.json b/tests/unit/data/empty_query_with_sort_by.json index 35229b4c..85ba385c 100644 --- a/tests/unit/data/empty_query_with_sort_by.json +++ b/tests/unit/data/empty_query_with_sort_by.json @@ -8,7 +8,5 @@ "size": 25, "from": 25 }, - "fulltext": null, - "filter_query": null, "facets_filters": null } \ No newline at end of file diff --git a/tests/unit/data/empty_query_with_sort_by_and_facets.json b/tests/unit/data/empty_query_with_sort_by_and_facets.json index b58ca4aa..bd0a44ec 100644 --- a/tests/unit/data/empty_query_with_sort_by_and_facets.json +++ b/tests/unit/data/empty_query_with_sort_by_and_facets.json @@ -3,14 +3,14 @@ "luqum_tree": "None", "es_query": { "aggs": { - "brands_tags": { + "lang": { "terms": { - "field": "brands_tags" + "field": "lang" } }, - "categories_tags": { + "brands": { "terms": { - "field": "categories_tags" + "field": "brands" } }, "nutrition_grades": { @@ -18,9 +18,9 @@ "field": "nutrition_grades" } }, - "lang": { + "categories": { "terms": { - "field": "lang" + "field": "categories" } } }, @@ -30,7 +30,5 @@ "size": 25, "from": 25 }, - "fulltext": null, - "filter_query": null, "facets_filters": null } \ No newline at end of file diff --git a/tests/unit/data/non_existing_filter_field.json b/tests/unit/data/non_existing_filter_field.json index 69db7ede..9b1d71e7 100644 --- a/tests/unit/data/non_existing_filter_field.json +++ b/tests/unit/data/non_existing_filter_field.json @@ -3,43 +3,18 @@ "luqum_tree": "non_existing_field:value", "es_query": { "query": { - "bool": { - "filter": [ - { - "bool": { - "must": [ - { - "match": { - "non_existing_field": { - "query": "value", - "zero_terms_query": "all" - } - } - } - ] - } - } - ] + "match": { + "non_existing_field": { + "query": "value", + "zero_terms_query": "none" + } } }, "size": 25, "from": 25 }, - "fulltext": "", - "filter_query": { - "bool": { - "must": [ - { - "match": { - "non_existing_field": { - "query": "value", - "zero_terms_query": "all" - } - } - } - ] - } - }, + "fulltext": null, + "filter_query": null, "facets_filters": { "non_existing_field": [ "value" diff --git a/tests/unit/data/non_existing_subfield.json b/tests/unit/data/non_existing_subfield.json new file mode 100644 index 00000000..f4028cf7 --- /dev/null +++ b/tests/unit/data/non_existing_subfield.json @@ -0,0 +1,36 @@ +{ + "text_query": "Milk AND nutriments:(nonexisting:>=3)", + "luqum_tree": "Milk AND nutriments:(nonexisting:[3 TO *])", + "es_query": { + "query": { + "bool": { + "must": [ + { + "multi_match": { + "query": "Milk", + "zero_terms_query": "all", + "fields": [ + "product_name.en", + "generic_name.en", + "categories.en", + "labels.en", + "brands.en" + ], + "type": "best_fields" + } + }, + { + "range": { + "nutriments.nonexisting": { + "gte": "3" + } + } + } + ] + } + }, + "size": 25, + "from": 25 + }, + "facets_filters": {} +} \ No newline at end of file diff --git a/tests/unit/data/open_range.json b/tests/unit/data/open_range.json new file mode 100644 index 00000000..bb2b6a42 --- /dev/null +++ b/tests/unit/data/open_range.json @@ -0,0 +1,42 @@ +{ + "text_query": "(unique_scans_n:>2 AND unique_scans_n:<3) OR unique_scans_n:>=10", + "luqum_tree": "(unique_scans_n:{2 TO *]AND unique_scans_n:[* TO 3}) OR unique_scans_n:[10 TO *]", + "es_query": { + "query": { + "bool": { + "should": [ + { + "bool": { + "must": [ + { + "range": { + "unique_scans_n": { + "gt": "2" + } + } + }, + { + "range": { + "unique_scans_n": { + "lt": "3" + } + } + } + ] + } + }, + { + "range": { + "unique_scans_n": { + "gte": "10" + } + } + } + ] + } + }, + "size": 25, + "from": 25 + }, + "facets_filters": {} +} \ No newline at end of file diff --git a/tests/unit/data/openfoodfacts_config.yml b/tests/unit/data/openfoodfacts_config.yml index 6981acb1..a12d3b73 100644 --- a/tests/unit/data/openfoodfacts_config.yml +++ b/tests/unit/data/openfoodfacts_config.yml @@ -44,19 +44,20 @@ indices: categories: full_text_search: true input_field: categories_tags - taxonomy_name: category + taxonomy_name: categories type: taxonomy + bucket_agg: true labels: full_text_search: true input_field: labels_tags - taxonomy_name: label + taxonomy_name: labels type: taxonomy + bucket_agg: true brands: full_text_search: true - split: true - type: text - brands_tags: - type: keyword + input_field: brands_tags + type: taxonomy + taxonomy_name: brands bucket_agg: true stores: split: true @@ -74,27 +75,22 @@ indices: bucket_agg: true quantity: type: text - categories_tags: - type: keyword - taxonomy_name: category - bucket_agg: true - labels_tags: - type: keyword - taxonomy_name: label - bucket_agg: true - countries_tags: - type: keyword + countries: + type: taxonomy + input_field: conutries_tags bucket_agg: true - taxonomy_name: country - states_tags: - type: keyword + taxonomy_name: countries + states: + type: taxonomy + input_field: states_tags bucket_agg: true - taxonomy_name: state + taxonomy_name: states origins_tags: type: keyword - ingredients_tags: - type: keyword - taxonomy_name: ingredient + ingredients: + type: taxonomy + input_field: ingredients_tags + taxonomy_name: ingredients unique_scans_n: type: integer scans_n: @@ -116,9 +112,11 @@ indices: type: disabled additives_n: type: integer - allergens_tags: - type: keyword - taxonomy_name: allergen + allergens: + type: taxonomy + input_field: allergens_tags + taxonomy_name: allergens + bucket_agg: true ecoscore_data: type: disabled ecoscore_score: @@ -139,6 +137,7 @@ indices: type: disabled nutriscore_grade: type: keyword + bucket_agg: true traces_tags: type: keyword unknown_ingredients_n: @@ -158,76 +157,73 @@ indices: result_processor: app.openfoodfacts.ResultProcessor split_separator: ',' redis_stream_name: product_updates_off + primary_color: "#341100" + accent_color: "#ff8714" taxonomy: sources: - - name: category + - name: categories url: https://static.openfoodfacts.org/data/taxonomies/categories.full.json - - name: label + - name: labels url: https://static.openfoodfacts.org/data/taxonomies/labels.full.json - - name: additive + - name: additives url: https://static.openfoodfacts.org/data/taxonomies/additives.full.json - - name: allergen + - name: allergens url: https://static.openfoodfacts.org/data/taxonomies/allergens.full.json - - name: amino_acid + - name: amino_acids url: https://static.openfoodfacts.org/data/taxonomies/amino_acids.full.json - - name: country + - name: countries url: https://static.openfoodfacts.org/data/taxonomies/countries.full.json - name: data_quality url: https://static.openfoodfacts.org/data/taxonomies/data_quality.full.json - - name: food_group + - name: food_groups url: https://static.openfoodfacts.org/data/taxonomies/food_groups.full.json - - name: improvement + - name: improvements url: https://static.openfoodfacts.org/data/taxonomies/improvements.full.json - - name: ingredient + - name: ingredients url: https://static.openfoodfacts.org/data/taxonomies/ingredients.full.json - name: ingredients_analysis url: https://static.openfoodfacts.org/data/taxonomies/ingredients_analysis.full.json - name: ingredients_processing url: https://static.openfoodfacts.org/data/taxonomies/ingredients_processing.full.json - - name: language + - name: languages url: https://static.openfoodfacts.org/data/taxonomies/languages.full.json - - name: mineral + - name: minerals url: https://static.openfoodfacts.org/data/taxonomies/minerals.full.json - name: misc url: https://static.openfoodfacts.org/data/taxonomies/misc.full.json - - name: nova_group + - name: nova_groups url: https://static.openfoodfacts.org/data/taxonomies/nova_groups.full.json - - name: nucleotide + - name: nucleotides url: https://static.openfoodfacts.org/data/taxonomies/nucleotides.full.json - - name: nutrient + - name: nutrients url: https://static.openfoodfacts.org/data/taxonomies/nutrients.full.json - - name: origin + - name: origins url: https://static.openfoodfacts.org/data/taxonomies/origins.full.json - - name: other_nutritional_substance + - name: other_nutritional_substances url: https://static.openfoodfacts.org/data/taxonomies/other_nutritional_substances.full.json - - name: packaging_material + - name: packaging_materials url: https://static.openfoodfacts.org/data/taxonomies/packaging_materials.full.json - name: packaging_recycling url: https://static.openfoodfacts.org/data/taxonomies/packaging_recycling.full.json - - name: packaging_shape + - name: packaging_shapes url: https://static.openfoodfacts.org/data/taxonomies/packaging_shapes.full.json - name: periods_after_opening url: https://static.openfoodfacts.org/data/taxonomies/periods_after_opening.full.json - name: preservation url: https://static.openfoodfacts.org/data/taxonomies/preservation.full.json - - name: state + - name: states url: https://static.openfoodfacts.org/data/taxonomies/states.full.json - - name: vitamin + - name: vitamins url: https://static.openfoodfacts.org/data/taxonomies/vitamins.full.json - - name: brand + - name: brands url: https://static.openfoodfacts.org/data/taxonomies/brands.full.json - exported_langs: - - en - - fr - - es - - de - - it - - nl index: number_of_replicas: 1 number_of_shards: 4 name: off_taxonomy supported_langs: + # a specific language to put the main language entry + - main - aa - ab - ae @@ -383,7 +379,6 @@ indices: - wa - wo - xh - - xx - yi - yo - zh diff --git a/tests/unit/data/simple_filter_query.json b/tests/unit/data/simple_filter_query.json index 0ffc4b1a..6bbac60f 100644 --- a/tests/unit/data/simple_filter_query.json +++ b/tests/unit/data/simple_filter_query.json @@ -1,45 +1,19 @@ { - "text_query": "countries_tags:\"en:italy\"", - "luqum_tree": "countries_tags:\"en:italy\"", + "text_query": "countries:\"en:italy\"", + "luqum_tree": "countries.en:\"en:italy\"", "es_query": { "query": { - "bool": { - "filter": [ - { - "bool": { - "must": [ - { - "term": { - "countries_tags": { - "value": "en:italy" - } - } - } - ] - } - } - ] + "match_phrase": { + "countries.en": { + "query": "en:italy" + } } }, "size": 25, "from": 25 }, - "fulltext": "", - "filter_query": { - "bool": { - "must": [ - { - "term": { - "countries_tags": { - "value": "en:italy" - } - } - } - ] - } - }, "facets_filters": { - "countries_tags": [ + "countries": [ "en:italy" ] } diff --git a/tests/unit/data/simple_full_text_query.json b/tests/unit/data/simple_full_text_query.json index 1ea5099b..82a97900 100644 --- a/tests/unit/data/simple_full_text_query.json +++ b/tests/unit/data/simple_full_text_query.json @@ -1,59 +1,61 @@ { "text_query": "flocons d'avoine", - "luqum_tree": "flocons d'avoine", + "luqum_tree": "((flocons AND d'avoine) OR \"flocons d'avoine\"^2)", "es_query": { "query": { "bool": { - "should": [ + "must": [ { - "match_phrase": { - "product_name.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "generic_name.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "categories.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "labels.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "brands": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "multi_match": { - "query": "flocons d'avoine", - "fields": [ - "product_name.fr", - "generic_name.fr", - "categories.fr", - "labels.fr", - "brands" + "bool": { + "should": [ + { + "bool": { + "must": [ + { + "multi_match": { + "query": "flocons", + "zero_terms_query": "all", + "fields": [ + "product_name.fr", + "generic_name.fr", + "categories.fr", + "labels.fr", + "brands.fr" + ], + "type": "best_fields" + } + }, + { + "multi_match": { + "query": "d'avoine", + "zero_terms_query": "all", + "fields": [ + "product_name.fr", + "generic_name.fr", + "categories.fr", + "labels.fr", + "brands.fr" + ], + "type": "best_fields" + } + } + ] + } + }, + { + "multi_match": { + "boost": 2.0, + "query": "flocons d'avoine", + "fields": [ + "product_name.fr", + "generic_name.fr", + "categories.fr", + "labels.fr", + "brands.fr" + ], + "type": "phrase" + } + } ] } } @@ -63,7 +65,5 @@ "size": 10, "from": 0 }, - "fulltext": "flocons d'avoine", - "filter_query": null, "facets_filters": {} } \ No newline at end of file diff --git a/tests/unit/data/simple_full_text_query_facets.json b/tests/unit/data/simple_full_text_query_facets.json index 4b0e5c12..3d6775f2 100644 --- a/tests/unit/data/simple_full_text_query_facets.json +++ b/tests/unit/data/simple_full_text_query_facets.json @@ -1,59 +1,61 @@ { "text_query": "flocons d'avoine", - "luqum_tree": "flocons d'avoine", + "luqum_tree": "((flocons AND d'avoine) OR \"flocons d'avoine\"^2)", "es_query": { "query": { "bool": { - "should": [ + "must": [ { - "match_phrase": { - "product_name.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "generic_name.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "categories.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "labels.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "brands": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "multi_match": { - "query": "flocons d'avoine", - "fields": [ - "product_name.fr", - "generic_name.fr", - "categories.fr", - "labels.fr", - "brands" + "bool": { + "should": [ + { + "bool": { + "must": [ + { + "multi_match": { + "query": "flocons", + "zero_terms_query": "all", + "fields": [ + "product_name.fr", + "generic_name.fr", + "categories.fr", + "labels.fr", + "brands.fr" + ], + "type": "best_fields" + } + }, + { + "multi_match": { + "query": "d'avoine", + "zero_terms_query": "all", + "fields": [ + "product_name.fr", + "generic_name.fr", + "categories.fr", + "labels.fr", + "brands.fr" + ], + "type": "best_fields" + } + } + ] + } + }, + { + "multi_match": { + "boost": 2.0, + "query": "flocons d'avoine", + "fields": [ + "product_name.fr", + "generic_name.fr", + "categories.fr", + "labels.fr", + "brands.fr" + ], + "type": "phrase" + } + } ] } } @@ -61,14 +63,9 @@ } }, "aggs": { - "brands_tags": { - "terms": { - "field": "brands_tags" - } - }, - "labels_tags": { + "brands": { "terms": { - "field": "labels_tags" + "field": "brands" } }, "nutrition_grades": { @@ -80,12 +77,15 @@ "terms": { "field": "owner" } + }, + "labels": { + "terms": { + "field": "labels" + } } }, "size": 10, "from": 0 }, - "fulltext": "flocons d'avoine", - "filter_query": null, "facets_filters": {} } \ No newline at end of file diff --git a/tests/unit/data/sort_by_query.json b/tests/unit/data/sort_by_query.json index d0c686fc..087f8604 100644 --- a/tests/unit/data/sort_by_query.json +++ b/tests/unit/data/sort_by_query.json @@ -1,60 +1,36 @@ { "text_query": "flocons d'avoine", - "luqum_tree": "flocons d'avoine", + "luqum_tree": "flocons AND d'avoine", "es_query": { "query": { "bool": { - "should": [ + "must": [ { - "match_phrase": { - "product_name.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "generic_name.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "categories.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "labels.fr": { - "query": "flocons d'avoine", - "boost": 2.0 - } - } - }, - { - "match_phrase": { - "brands": { - "query": "flocons d'avoine", - "boost": 2.0 - } + "multi_match": { + "query": "flocons", + "zero_terms_query": "all", + "fields": [ + "product_name.fr", + "generic_name.fr", + "categories.fr", + "labels.fr", + "brands.fr" + ], + "type": "best_fields" } }, { "multi_match": { - "query": "flocons d'avoine", + "query": "d'avoine", + "zero_terms_query": "all", "fields": [ "product_name.fr", "generic_name.fr", "categories.fr", "labels.fr", - "brands" - ] + "brands.fr" + ], + "type": "best_fields" } } ] @@ -70,7 +46,5 @@ "size": 10, "from": 0 }, - "fulltext": "flocons d'avoine", - "filter_query": null, "facets_filters": {} } \ No newline at end of file diff --git a/tests/unit/data/test_open_range.json b/tests/unit/data/test_open_range.json new file mode 100644 index 00000000..bb2b6a42 --- /dev/null +++ b/tests/unit/data/test_open_range.json @@ -0,0 +1,42 @@ +{ + "text_query": "(unique_scans_n:>2 AND unique_scans_n:<3) OR unique_scans_n:>=10", + "luqum_tree": "(unique_scans_n:{2 TO *]AND unique_scans_n:[* TO 3}) OR unique_scans_n:[10 TO *]", + "es_query": { + "query": { + "bool": { + "should": [ + { + "bool": { + "must": [ + { + "range": { + "unique_scans_n": { + "gt": "2" + } + } + }, + { + "range": { + "unique_scans_n": { + "lt": "3" + } + } + } + ] + } + }, + { + "range": { + "unique_scans_n": { + "gte": "10" + } + } + } + ] + } + }, + "size": 25, + "from": 25 + }, + "facets_filters": {} +} \ No newline at end of file diff --git a/tests/unit/data/wildcard_in_phrase_is_legit.json b/tests/unit/data/wildcard_in_phrase_is_legit.json new file mode 100644 index 00000000..dd18f8a6 --- /dev/null +++ b/tests/unit/data/wildcard_in_phrase_is_legit.json @@ -0,0 +1,53 @@ +{ + "text_query": "Milk AND \"*\" AND categories:\"*\"", + "luqum_tree": "Milk AND \"*\" AND categories.en:\"*\"", + "es_query": { + "query": { + "bool": { + "must": [ + { + "multi_match": { + "query": "Milk", + "zero_terms_query": "all", + "fields": [ + "product_name.en", + "generic_name.en", + "categories.en", + "labels.en", + "brands.en" + ], + "type": "best_fields" + } + }, + { + "multi_match": { + "query": "*", + "fields": [ + "product_name.en", + "generic_name.en", + "categories.en", + "labels.en", + "brands.en" + ], + "type": "phrase" + } + }, + { + "match_phrase": { + "categories.en": { + "query": "*" + } + } + } + ] + } + }, + "size": 25, + "from": 25 + }, + "facets_filters": { + "categories": [ + "*" + ] + } +} \ No newline at end of file diff --git a/tests/unit/test__import.py b/tests/unit/test__import.py index bb5daee2..93d4bab9 100644 --- a/tests/unit/test__import.py +++ b/tests/unit/test__import.py @@ -325,20 +325,8 @@ def test_gen_documents(default_config): assert isinstance(last_indexed_datetime, str) assert datetime.datetime.fromisoformat(last_indexed_datetime) > start_datetime assert "categories" in document["_source"] - categories = { - lang: set(names) - for lang, names in document["_source"].pop("categories").items() - } - assert categories == { - "de": {"Getränke"}, - "en": {"Beverages", "Drinks"}, - "es": {"Bebidas"}, - "fr": {"Boissons"}, - "it": {"Bevande"}, - "nl": {"Drank", "Dranken"}, - } assert document["_source"] == { - "categories_tags": ["en:beverages"], + "categories": ["en:beverages"], "code": ids[i], } diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 07436d23..f6bfcfc5 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -1,3 +1,5 @@ +import pytest + import app.config BASE_CONFIG = """ @@ -38,6 +40,7 @@ document_fetcher: app._import.BaseDocumentFetcher default_index: test """ + AGGS_FIELDS = """ other_agg: type: keyword @@ -50,6 +53,13 @@ bucket_agg: true """ +RESERVED_FIELDS = """ + last_indexed_datetime: + type: date + _id: + type: keyword +""" + def _config_with_aggs(tmpdir, facets=""): my_config = tmpdir / "config.yaml" @@ -64,3 +74,14 @@ def test_loading_config(tmpdir): # just test it loads for now app.config.Config.from_yaml(conf_file) # TODO add asserts on config + + +def test_reserved_field_names(tmpdir): + """Test we can't use a reserved field name""" + my_config = tmpdir / "config.yaml" + conf_content = BASE_CONFIG.replace(" # more fields\n", RESERVED_FIELDS) + open(my_config, "w").write(conf_content) + with pytest.raises(ValueError) as excinfo: + app.config.Config.from_yaml(my_config) + assert "last_indexed_datetime" in str(excinfo.value) + assert "_id" in str(excinfo.value) diff --git a/tests/unit/test_indexing.py b/tests/unit/test_indexing.py index 31032f13..3c12e24e 100644 --- a/tests/unit/test_indexing.py +++ b/tests/unit/test_indexing.py @@ -31,7 +31,6 @@ "pt-BR": "pt-BR", "pt": "pt-PT", "main": "MAIN", - "other": ["VN", "ID"], }, ), # Same, but without main language @@ -49,17 +48,15 @@ def test_process_text_lang_field(data, input_field, split, expected): lang_separator = "_" split_separator = "," supported_langs = {"fr", "it", "pt-BR", "pt"} - assert ( - process_text_lang_field( - data=data, - input_field=input_field, - split=split, - lang_separator=lang_separator, - split_separator=split_separator, - supported_langs=supported_langs, - ) - == expected + result = process_text_lang_field( + data=data, + input_field=input_field, + split=split, + lang_separator=lang_separator, + split_separator=split_separator, + supported_langs=supported_langs, ) + assert result == expected taxonomy_config = TaxonomyConfig( @@ -75,7 +72,7 @@ def test_process_text_lang_field(data, input_field, split, expected): @pytest.mark.parametrize( - "data, field, taxonomy_config, taxonomy_langs, expected", + "data, field, taxonomy_config, expected", [ ( { @@ -90,60 +87,15 @@ def test_process_text_lang_field(data, input_field, split, expected): name="categories", input_field="categories_tags", split=True, - add_taxonomy_synonyms=True, - taxonomy_name="category", - ), - taxonomy_config, - {"en"}, - { - "fr": [ - "Boissons", - "alcool", - "alcools", - "boisson alcoolisée", - "Boissons alcoolisées", - "Edamame", - ], - "it": ["Bevande", "Bevande alcoliche", "Edamame"], - "en": [ - "Drinks", - "Beverages", - "Alcoholic beverages", - "drinks with alcohol", - "alcohols", - "Alcoholic drinks", - "Edamame", - ], - "original": "Boissons,Boissons alcoolisées,Edamame", - }, - ), - # Same, but without synonyms - ( - { - "taxonomy_langs": ["fr", "it"], - "categories_tags": "en:beverages,en:alcoholic-beverages", - }, - FieldConfig( - type=FieldType.taxonomy, - name="categories", - input_field="categories_tags", - split=True, - add_taxonomy_synonyms=False, taxonomy_name="category", ), taxonomy_config, - {"en"}, - { - "fr": [ - "Boissons", - "Boissons alcoolisées", - ], - "it": ["Bevande", "Bevande alcoliche"], - "en": [ - "Beverages", - "Alcoholic beverages", - ], - }, + [ + "en:beverages", + "en:alcoholic-beverages", + "en:not-in-taxonomy", + "en:edamame", + ], ), # The field is missing here, we should return None ( @@ -153,28 +105,23 @@ def test_process_text_lang_field(data, input_field, split, expected): name="categories", input_field="categories_tags", split=True, - add_taxonomy_synonyms=False, taxonomy_name="category", ), taxonomy_config, - {"en"}, None, ), ], ) -def test_process_taxonomy_field(data, field, taxonomy_config, taxonomy_langs, expected): +def test_process_taxonomy_field(data, field, taxonomy_config, expected): split_separator = "," output = process_taxonomy_field( data=data, field=field, taxonomy_config=taxonomy_config, split_separator=split_separator, - taxonomy_langs=taxonomy_langs, ) if expected is None: assert output is None else: - assert set(output.keys()) == set(expected.keys()) - for key in expected.keys(): - assert set(output[key]) == set(expected[key]) + assert set(output) == set(expected) diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py index 217520ee..efc7cd4b 100644 --- a/tests/unit/test_query.py +++ b/tests/unit/test_query.py @@ -1,12 +1,15 @@ from pathlib import Path +from typing import Any import orjson import pytest -from luqum.elasticsearch import ElasticsearchQueryBuilder +from luqum.parser import parser -from app._types import JSONType, SearchParameters +from app._types import QueryAnalysis, SearchParameters from app.config import IndexConfig -from app.query import build_search_query, decompose_query, parse_query +from app.es_query_builder import FullTextQueryBuilder +from app.exceptions import QueryAnalysisError +from app.query import boost_phrases, build_search_query, resolve_unknown_operation from app.utils.io import dump_json, load_json DATA_DIR = Path(__file__).parent / "data" @@ -16,88 +19,60 @@ def load_elasticsearch_query_result(id_: str): return load_json(DATA_DIR / f"{id_}.json") +def test_boost_phrases_none(): + # luqum_tree is None + analysis = QueryAnalysis() + analysis = boost_phrases(analysis, 3.0, 2) + assert analysis.luqum_tree is None + + @pytest.mark.parametrize( - "q,expected_filter_query,expected_fulltext", + "query,proximity,expected", [ - # single term + ("Milk", 3, "Milk"), + ("Whole Milk", None, '((Whole AND Milk) OR "Whole Milk"^2.1)'), + ("Whole Milk", 3, '((Whole AND Milk) OR "Whole Milk"~3^2.1)'), + ("Whole AND Milk", 3, '((Whole AND Milk) OR "Whole Milk"~3^2.1)'), ( - "word", - None, - "word", - ), - ( - 'word1 (states_tags:"en:france" OR states_tags:"en:germany") word2 labels_tags:"en:organic" word3', - { - "bool": { - "must": [ - { - "bool": { - "should": [ - {"term": {"states_tags": {"value": "en:france"}}}, - {"term": {"states_tags": {"value": "en:germany"}}}, - ] - } - }, - {"term": {"labels_tags": {"value": "en:organic"}}}, - ] - }, - }, - "word1 word2 word3", - ), - # only non-filter keywords - ( - "word1 word2", - None, - "word1 word2", + "Whole Milk Cream", + 3, + '((Whole AND Milk AND Cream) OR "Whole Milk Cream"~3^2.1)', ), + # no boost on OR + ("Whole OR Milk", 3, "Whole OR Milk"), + # and in search fields expressions ( - 'states_tags:"en:spain"', - {"bool": {"must": [{"term": {"states_tags": {"value": "en:spain"}}}]}}, - "", + "Cream AND labels:(Vegan AND Fair-trade)", + 3, + "Cream AND labels:(Vegan AND Fair-trade)", ), + # mix things ( - "nutriments.salt_100g:[2 TO *]", - {"bool": {"must": [{"range": {"nutriments.salt_100g": {"gte": "2"}}}]}}, - "", + 'Whole Milk "No gluten" Vegetarian Soup', + 3, + '((Whole AND Milk) OR "Whole Milk"~3^2.1) AND "No gluten" AND ((Vegetarian AND Soup) OR "Vegetarian Soup"~3^2.1)', ), + # complexe expression ( - "non_existing_field:value", - { - "bool": { - "must": [ - { - "match": { - "non_existing_field": { - "query": "value", - "zero_terms_query": "all", - } - } - } - ] - } - }, - "", + "Cream AND (labels:Vegan OR NOT (Whole AND Milk)^3)", + 3, + 'Cream AND (labels:Vegan OR NOT (((Whole AND Milk) OR "Whole Milk"~3^2.1))^3)', ), ], ) -def test_decompose_query( - q: str, expected_filter_query: list[JSONType], expected_fulltext: str -): - query_builder = ElasticsearchQueryBuilder( - default_operator=ElasticsearchQueryBuilder.MUST, - not_analyzed_fields=["states_tags", "labels_tags", "countries_tags"], - object_fields=["nutriments", "nutriments.salt_100g"], - ) - analysis = parse_query(q) - analysis = decompose_query(analysis, filter_query_builder=query_builder) - assert analysis.filter_query == expected_filter_query - assert analysis.fulltext == expected_fulltext +def test_boost_phrases(query: str, proximity: int | None, expected: str): + luqum_tree = parser.parse(query) + analysis = QueryAnalysis(luqum_tree=luqum_tree) + # resolve unknown operation + analysis = resolve_unknown_operation(analysis) + analysis = boost_phrases(analysis, 2.1, proximity) + assert str(analysis.luqum_tree) == expected @pytest.mark.parametrize( - "id_,q,langs,size,page,sort_by,facets", + "id_,q,langs,size,page,sort_by,facets,boost_phrase", [ - ("simple_full_text_query", "flocons d'avoine", {"fr"}, 10, 1, None, None), + ("simple_full_text_query", "flocons d'avoine", {"fr"}, 10, 1, None, None, True), ( "simple_full_text_query_facets", "flocons d'avoine", @@ -105,30 +80,41 @@ def test_decompose_query( 10, 1, None, - ["brands_tags", "labels_tags", "nutrition_grades", "owner"], + ["brands", "labels", "nutrition_grades", "owner"], + True, ), # sort by descending number of scan count - ("sort_by_query", "flocons d'avoine", {"fr"}, 10, 1, "-unique_scans_n", None), + ( + "sort_by_query", + "flocons d'avoine", + {"fr"}, + 10, + 1, + "-unique_scans_n", + None, + True, + ), # we change number of results (25 instead of 10) and request page 2 - ("simple_filter_query", 'countries_tags:"en:italy"', {"en"}, 25, 2, None, None), ( - "complex_query", - 'bacon de boeuf (countries_tags:"en:italy" AND (categories_tags:"en:beef" AND ' - "(nutriments.salt_100g:[2 TO *] OR nutriments.salt_100g:[0 TO 0.05])))", + "simple_filter_query", + 'countries:"en:italy"', {"en"}, 25, 2, None, None, + True, ), ( - "non_existing_filter_field", - "non_existing_field:value", + "complex_query", + 'bacon de boeuf (countries:italy AND (categories:"en:beef" AND ' + "(nutriments.salt_100g:[2 TO *] OR nutriments.salt_100g:[0 TO 0.05])))", {"en"}, 25, 2, None, None, + True, ), ( "empty_query_with_sort_by", @@ -138,6 +124,7 @@ def test_decompose_query( 2, "unique_scans_n", None, + True, ), ( "empty_query_with_sort_by_and_facets", @@ -146,11 +133,48 @@ def test_decompose_query( 25, 2, "unique_scans_n", - ["brands_tags", "categories_tags", "nutrition_grades", "lang"], + ["brands", "categories", "nutrition_grades", "lang"], + True, + ), + ( + "open_range", + "(unique_scans_n:>2 AND unique_scans_n:<3) OR unique_scans_n:>=10", + {"en"}, + 25, + 2, + None, + None, + True, + ), + ( + # it should be ok for now, until we implement subfields + "non_existing_subfield", + "Milk AND nutriments:(nonexisting:>=3)", + {"en"}, + 25, + 2, + None, + None, + True, + ), + ( + # * in a phrase is legit, it does not have the wildcard meaning + "wildcard_in_phrase_is_legit", + 'Milk AND "*" AND categories:"*"', + {"en"}, + 25, + 2, + None, + None, + True, ), + # TODO + # - test scripts sorting + # - test ranges and OPen ranges ], ) def test_build_search_query( + # parameters id_: str, q: str, langs: set[str], @@ -158,20 +182,24 @@ def test_build_search_query( page: int, sort_by: str | None, facets: list[str] | None, + boost_phrase: bool, + # fixtures update_results: bool, default_config: IndexConfig, - default_filter_query_builder: ElasticsearchQueryBuilder, + default_filter_query_builder: FullTextQueryBuilder, ): + params = SearchParameters( + q=q, + langs=langs, + page_size=size, + page=page, + sort_by=sort_by, + facets=facets, + boost_phrase=boost_phrase, + ) query = build_search_query( - SearchParameters( - q=q, - langs=langs, - page_size=size, - page=page, - sort_by=sort_by, - facets=facets, - ), - filter_query_builder=default_filter_query_builder, + params, + es_query_builder=default_filter_query_builder, ) if update_results: @@ -181,3 +209,54 @@ def test_build_search_query( expected_result = load_elasticsearch_query_result(id_) assert query._dict_dump() == expected_result + + +@pytest.mark.parametrize( + "specific_params, error_msg", + [ + # non existing field + ({"q": "nonexisting:Milk"}, "field 'nonexisting' not found in index config"), + # non existing field inside more complex request + ( + {"q": "Milk AND (categories:en:Whole OR (nonexisting:Whole)^2)"}, + "field 'nonexisting' not found in index config", + ), + # wildcard alone + ( + {"q": "Milk OR (Cream AND *)"}, + "Free wildcards are not allowed in full text queries", + ), + # unparsable request + # missing closing bracket or parenthesis + ({"q": "completeness:[2 TO 22"}, "Request could not be analyzed by luqum"), + ({"q": "(Milk OR Cream"}, "Request could not be analyzed by luqum"), + # And and OR on same level + ( + {"q": "Milk OR Cream AND Coffee"}, + "Request could not be transformed by luqum", + ), + ], +) +def test_build_search_query_failure( + specific_params: dict[str, Any], + error_msg: str, + default_config: IndexConfig, + default_filter_query_builder: FullTextQueryBuilder, +): + # base search params + params = { + "q": "Milk", + "langs": ["fr", "en"], + "page_size": 5, + "page": 1, + "sort_by": None, + "facets": None, + "boost_phrase": True, + } + params.update(specific_params) + with pytest.raises((QueryAnalysisError, ValueError)) as exc_info: + build_search_query( + SearchParameters(**params), + es_query_builder=default_filter_query_builder, + ) + assert error_msg in str(exc_info.value)