From e09e36d6f32e6601f266366c5d27fbba8786d45f Mon Sep 17 00:00:00 2001 From: David Pilato Date: Mon, 6 Jan 2025 16:03:29 +0100 Subject: [PATCH] Add support for semantic search This is an OOTB behaviour which detects if the cluster is capable of semantic search. This implies having a 8.17+ version and a trial or enterprise license. It does not work (yet) with a basic license. We also update docker specs as we need more memory to run ML jobs. --- .../docker-compose-example-elasticsearch/.env | 9 +- .../docker-compose.yml | 1 - contrib/docker-compose-it/.env | 5 +- .../docker-compose-example-elasticsearch/.env | 9 +- .../src/main/resources/docker-compose-it/.env | 5 +- docs/source/admin/fs/elasticsearch.rst | 88 ++++++++++- docs/source/admin/fs/index.rst | 6 +- docs/source/dev/build.rst | 13 +- docs/source/release/2.10.rst | 14 +- .../crawler/fs/client/ESSemanticQuery.java | 33 +++++ .../fs/client/ElasticsearchClient.java | 98 ++++++++++++- .../fs/client/IElasticsearchClient.java | 11 ++ .../fscrawler_mapping_content_semantic.json | 15 ++ .../fscrawler_mapping_content_vector.json | 14 ++ .../fscrawler_docs_semantic.json | 20 +++ .../fscrawler_docs_vector.json | 20 +++ .../fs/client/ElasticsearchClientIT.java | 137 ++++++++++++++++-- .../fs/client/TestContainerHelper.java | 6 +- .../crawler/fs/framework/FsCrawlerUtil.java | 4 +- .../fs/framework/FsCrawlerUtilTest.java | 6 +- .../fs/test/integration/AbstractITCase.java | 42 +++--- .../test/integration/TestContainerHelper.java | 2 +- .../FsCrawlerTestSemanticIT.java | 62 ++++++++ .../samples/test_semantic/3547447.pdf | Bin 0 -> 21607 bytes .../samples/test_semantic/83816738.pdf | Bin 0 -> 29300 bytes .../samples/test_semantic/README.txt | 2 + .../crawler/fs/settings/Elasticsearch.java | 20 ++- 27 files changed, 573 insertions(+), 69 deletions(-) create mode 100644 elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/ESSemanticQuery.java create mode 100644 elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_component_templates/fscrawler_mapping_content_semantic.json create mode 100644 elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_component_templates/fscrawler_mapping_content_vector.json create mode 100644 elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_index_templates/fscrawler_docs_semantic.json create mode 100644 elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_index_templates/fscrawler_docs_vector.json create mode 100644 integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/elasticsearch/FsCrawlerTestSemanticIT.java create mode 100644 integration-tests/src/test/resources-binary/samples/test_semantic/3547447.pdf create mode 100644 integration-tests/src/test/resources-binary/samples/test_semantic/83816738.pdf create mode 100644 integration-tests/src/test/resources-binary/samples/test_semantic/README.txt diff --git a/contrib/docker-compose-example-elasticsearch/.env b/contrib/docker-compose-example-elasticsearch/.env index 0ee0d2b94..164789653 100644 --- a/contrib/docker-compose-example-elasticsearch/.env +++ b/contrib/docker-compose-example-elasticsearch/.env @@ -26,12 +26,11 @@ ES_PORT=9200 # Port to expose Kibana to the host KIBANA_PORT=5601 -# Enterprise Search settings -ENTERPRISE_SEARCH_PORT=3002 -ENCRYPTION_KEYS=q3t6w9z$C&F)J@McQfTjWnZr4u7x!A%D - # Increase or decrease based on the available host memory (in bytes) -MEM_LIMIT=1073741824 +# When using basic, that should be enough as we don't run ML jobs +# MEM_LIMIT=1073741824 +# When using trial, you need 4gb to be able to run inference with Elasticsearch +MEM_LIMIT=4294967296 # Project namespace (defaults to the current folder name if not set) COMPOSE_PROJECT_NAME=fscrawler diff --git a/contrib/docker-compose-example-elasticsearch/docker-compose.yml b/contrib/docker-compose-example-elasticsearch/docker-compose.yml index ec29bb4a9..dfc7ed779 100644 --- a/contrib/docker-compose-example-elasticsearch/docker-compose.yml +++ b/contrib/docker-compose-example-elasticsearch/docker-compose.yml @@ -110,7 +110,6 @@ services: - ELASTICSEARCH_USERNAME=kibana_system - ELASTICSEARCH_PASSWORD=${KIBANA_PASSWORD} - ELASTICSEARCH_SSL_CERTIFICATEAUTHORITIES=config/certs/ca/ca.crt - - ENTERPRISESEARCH_HOST=http://enterprisesearch:${ENTERPRISE_SEARCH_PORT} mem_limit: ${MEM_LIMIT} healthcheck: test: diff --git a/contrib/docker-compose-it/.env b/contrib/docker-compose-it/.env index 8ccdd4814..facb6768e 100644 --- a/contrib/docker-compose-it/.env +++ b/contrib/docker-compose-it/.env @@ -23,7 +23,10 @@ ES_PORT=9200 KIBANA_PORT=5601 # Increase or decrease based on the available host memory (in bytes) -MEM_LIMIT=1073741824 +# When using basic, that should be enough as we don't run ML jobs +# MEM_LIMIT=1073741824 +# When using trial, you need 4gb to be able to run inference with Elasticsearch +MEM_LIMIT=4294967296 # Project namespace (defaults to the current folder name if not set) COMPOSE_PROJECT_NAME=fscrawler diff --git a/contrib/src/main/resources/docker-compose-example-elasticsearch/.env b/contrib/src/main/resources/docker-compose-example-elasticsearch/.env index 066f8c85a..377608caa 100644 --- a/contrib/src/main/resources/docker-compose-example-elasticsearch/.env +++ b/contrib/src/main/resources/docker-compose-example-elasticsearch/.env @@ -26,12 +26,11 @@ ES_PORT=9200 # Port to expose Kibana to the host KIBANA_PORT=5601 -# Enterprise Search settings -ENTERPRISE_SEARCH_PORT=3002 -ENCRYPTION_KEYS=q3t6w9z$C&F)J@McQfTjWnZr4u7x!A%D - # Increase or decrease based on the available host memory (in bytes) -MEM_LIMIT=1073741824 +# When using basic, that should be enough as we don't run ML jobs +# MEM_LIMIT=1073741824 +# When using trial, you need 4gb to be able to run inference with Elasticsearch +MEM_LIMIT=4294967296 # Project namespace (defaults to the current folder name if not set) COMPOSE_PROJECT_NAME=fscrawler diff --git a/contrib/src/main/resources/docker-compose-it/.env b/contrib/src/main/resources/docker-compose-it/.env index 2402e6406..b6a835898 100644 --- a/contrib/src/main/resources/docker-compose-it/.env +++ b/contrib/src/main/resources/docker-compose-it/.env @@ -23,7 +23,10 @@ ES_PORT=9200 KIBANA_PORT=5601 # Increase or decrease based on the available host memory (in bytes) -MEM_LIMIT=1073741824 +# When using basic, that should be enough as we don't run ML jobs +# MEM_LIMIT=1073741824 +# When using trial, you need 4gb to be able to run inference with Elasticsearch +MEM_LIMIT=4294967296 # Project namespace (defaults to the current folder name if not set) COMPOSE_PROJECT_NAME=fscrawler diff --git a/docs/source/admin/fs/elasticsearch.rst b/docs/source/admin/fs/elasticsearch.rst index acfda88f3..70a67a5b3 100644 --- a/docs/source/admin/fs/elasticsearch.rst +++ b/docs/source/admin/fs/elasticsearch.rst @@ -24,6 +24,8 @@ Here is a list of Elasticsearch settings (under ``elasticsearch.`` prefix)`: +-----------------------------------+---------------------------+---------------------------------+ | ``elasticsearch.pipeline`` | ``null`` | :ref:`ingest_node` | +-----------------------------------+---------------------------+---------------------------------+ +| ``elasticsearch.semantic_search`` | ``true`` | :ref:`semantic_search` | ++-----------------------------------+---------------------------+---------------------------------+ | ``elasticsearch.nodes`` | ``https://127.0.0.1:9200``| `Node settings`_ | +-----------------------------------+---------------------------+---------------------------------+ | ``elasticsearch.path_prefix`` | ``null`` | `Path prefix`_ | @@ -87,7 +89,10 @@ to define the index settings and mappings: and the mapping for the ``path`` field. - ``fscrawler_mapping_attachment``: defines the mapping for the ``attachment`` field. -- ``fscrawler_mapping_content``: defines the mapping for the ``content`` field. +- ``fscrawler_mapping_content_semantic``: defines the mapping for the ``content`` field when using semantic search. +It also creates a ``semantic_text`` field named ``content_semantic``. Please read the :ref:`semantic_search` section. + +- ``fscrawler_mapping_content``: defines the mapping for the ``content`` field when semantic search is not available. - ``fscrawler_mapping_meta``: defines the mapping for the ``meta`` field. You can see the content of those templates by running: @@ -117,6 +122,29 @@ If you want to define your own index settings and mapping to set analyzers for example, you can update the needed component template **before starting the FSCrawler**. +The following example uses a ``french`` analyzer to index the +``content`` field and still allow using semantic search. + +.. code:: json + + PUT _component_template/fscrawler_mapping_content_semantic + { + "template": { + "mappings": { + "properties": { + "content": { + "type": "text", + "analyzer": "french", + "copy_to": "content_semantic" + }, + "content_semantic": { + "type": "semantic_text" + } + } + } + } + } + The following example uses a ``french`` analyzer to index the ``content`` field. @@ -148,6 +176,58 @@ You might to try `elasticsearch Reindex API `__ though. +.. _semantic_search: + +Semantic search +""""""""""""""" + +.. versionadded:: 2.10 + +FSCrawler can use `semantic search `__ +to improve the search results. + +.. note:: + + Semantic search is available starting from Elasticsearch 8.17.0 and requires a trial or enterprise license. + +Semantic search is enabled by default when an Elasticsearch 8.17.0 or above and a trial or enterprise license are +detected. But you can disable it by setting ``semantic_search`` to ``false``: + +.. code:: yaml + + name: "test" + elasticsearch: + semantic_search: false + +When activated, the ``content`` field is indexed as usual but a new field named ``content_semantic`` +is created and uses the `semantic_text `__ +field type. This field type is used to store the semantic information extracted from the content by using the defined +inference API (defaults to `Elser model `__). + +You can change the model to use by changing the component template. For example, a recommended model when you have only +english content is the Elastic `multilingual-e5-small `__: + +.. code:: json + + PUT _component_template/fscrawler_mapping_content_semantic + { + "template": { + "mappings": { + "properties": { + "content": { + "type": "text", + "copy_to": "content_semantic" + }, + "content_semantic": { + "type": "semantic_text", + "inference_id": ".multilingual-e5-small-elasticsearch" + } + } + } + } + } + + Bulk settings ^^^^^^^^^^^^^ @@ -330,8 +410,7 @@ Then you can use the encoded API Key in FSCrawler settings: Basic Authentication (deprecated) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The best practice is to use `API Key`_ or `Access Token`_. But if you have no other choice, -you can still use Basic Authentication. +The best practice is to use `API Key`_. But if you have no other choice, you can still use Basic Authentication. You can provide the ``username`` and ``password`` to FSCrawler: @@ -465,6 +544,9 @@ FSCrawler may create the following fields depending on configuration and availab +============================+========================================+==============================================+=====================================================================+ | ``content`` | Extracted content | ``"This is my text!"`` | | +----------------------------+----------------------------------------+----------------------------------------------+---------------------------------------------------------------------+ +| ``content_semantic`` | Semantic version of the extracted | Semantic representation | | +| | content | | | ++----------------------------+----------------------------------------+----------------------------------------------+---------------------------------------------------------------------+ | ``attachment`` | BASE64 encoded binary file | BASE64 Encoded document | | | | | | | +----------------------------+----------------------------------------+----------------------------------------------+---------------------------------------------------------------------+ diff --git a/docs/source/admin/fs/index.rst b/docs/source/admin/fs/index.rst index e639e5a98..c12804ce6 100644 --- a/docs/source/admin/fs/index.rst +++ b/docs/source/admin/fs/index.rst @@ -104,8 +104,12 @@ The job file (``~/.fscrawler/test/_settings.yaml``) for the job name ``test`` mu index: "test_docs" # optional, defaults to "test_folders", used when es.index_folders is set to true index_folder: "test_fold" + # optional, defaults to "true" + push_templates: "true" + # optional, defaults to "true", used with Elasticsearch 8.17+ with a trial or enterprise license + semantic_search: "true" + # only used when started with --rest option rest: - # only is started with --rest option url: "http://127.0.0.1:8080/fscrawler" Here is a list of existing top level settings: diff --git a/docs/source/dev/build.rst b/docs/source/dev/build.rst index 0e086de62..4c1678a32 100644 --- a/docs/source/dev/build.rst +++ b/docs/source/dev/build.rst @@ -89,7 +89,7 @@ To run the test suite against an elasticsearch instance running locally, just ru .. hint:: - If you are using a secured instance, use ``tests.cluster.user``, ``tests.cluster.apiKey``:: + If you are using a secured instance, use ``tests.cluster.apiKey``:: mvn verify -pl fr.pilato.elasticsearch.crawler:fscrawler-it \ -Dtests.cluster.apiKey=APIKEYHERE \ @@ -102,6 +102,17 @@ To run the test suite against an elasticsearch instance running locally, just ru -Dtests.cluster.pass=changeme \ -Dtests.cluster.url=https://127.0.0.1:9200 \ + If the cluster is using a self generated SSL certificate, you can bypass checking the certificate by using + ``tests.cluster.check_ssl``:: + + mvn verify -pl fr.pilato.elasticsearch.crawler:fscrawler-it \ + -Dtests.cluster.apiKey=APIKEYHERE \ + -Dtests.cluster.url=https://127.0.0.1:9200 \ + -Dtests.cluster.check_ssl=false + + But anyway, by default, the integration tests will try to run with both options, first checking the ssl certificate, + and then ignoring it. + .. hint:: To run tests against another instance (ie. running on diff --git a/docs/source/release/2.10.rst b/docs/source/release/2.10.rst index 9fe6dbb99..6c26a697e 100644 --- a/docs/source/release/2.10.rst +++ b/docs/source/release/2.10.rst @@ -4,16 +4,18 @@ Version 2.10 New --- +* Add support for automatic semantic search when using a 8.17+ version with a trial or enterprise + license. See :ref:`semantic_search`. Thanks to dadoonet. * Using the REST API ``_document``, you can now fetch a document from the local dir, from an http website -or from an S3 bucket. Thanks to dadoonet. -* You can now remove a document in Elasticsearch using FSCrawler ``_document`` endpoint. Thanks to dadoonet. + or from an S3 bucket. See :ref:`rest-service`. Thanks to dadoonet. +* You can now remove a document in Elasticsearch using FSCrawler ``_document`` endpoint. See :ref:`rest-service`. Thanks to dadoonet. * Implement our own HTTP Client for Elasticsearch. Thanks to dadoonet. -* Add option to set path to custom tika config file. Thanks to iadcode. -* Support for Index Templates. Thanks to dadoonet. +* Add option to set path to custom tika config file. See :ref:`local-fs-settings`. Thanks to iadcode. +* Support for Index Templates. See :ref:`mappings`. Thanks to dadoonet. * Support for Aliases. You can now index to an alias. Thanks to dadoonet. -* Support for Access Token and Api Keys instead of Basic Authentication. Thanks to dadoonet. +* Support for Access Token and Api Keys instead of Basic Authentication. See :ref:`credentials`. Thanks to dadoonet. * Allow loading external jars. This adds a new ``external`` directory from where jars can be loaded - to the FSCrawler JVM. For example, you could provide your own Custom Tika Parser code. Thanks to dadoonet. + to the FSCrawler JVM. For example, you could provide your own Custom Tika Parser code. See :ref:`layout`. Thanks to dadoonet. * Add temporal information in folder index. Thanks to bdauvissat Fix diff --git a/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/ESSemanticQuery.java b/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/ESSemanticQuery.java new file mode 100644 index 000000000..0a8114c11 --- /dev/null +++ b/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/ESSemanticQuery.java @@ -0,0 +1,33 @@ +/* + * Licensed to David Pilato under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package fr.pilato.elasticsearch.crawler.fs.client; + +public class ESSemanticQuery extends ESQuery { + private final String value; + + public ESSemanticQuery(String field, String value) { + super(field); + this.value = value; + } + + public String getValue() { + return value; + } +} diff --git a/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/ElasticsearchClient.java b/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/ElasticsearchClient.java index a8d2c5442..1a9091126 100644 --- a/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/ElasticsearchClient.java +++ b/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/ElasticsearchClient.java @@ -90,10 +90,14 @@ public class ElasticsearchClient implements IElasticsearchClient { private final List initialHosts; private String version = null; + private String license = null; private int majorVersion; + private int minorVersion; private int currentNode = -1; private int currentRun = -1; private String authorizationHeader = null; + private boolean semanticSearch; + private boolean vectorSearch = false; public ElasticsearchClient(Path config, FsSettings settings) { this.config = config; @@ -108,6 +112,7 @@ public ElasticsearchClient(Path config, FsSettings settings) { // We have only one node, so we won't have to select a specific one but the only one. currentNode = 0; } + semanticSearch = settings.getElasticsearch().isSemanticSearch(); } @Override @@ -153,7 +158,6 @@ public void start() throws ElasticsearchClientException { } } - // If we have an Api Key let's use it. Otherwise, we will use basic auth if (!FsCrawlerUtil.isNullOrEmpty(settings.getElasticsearch().getApiKey())) { authorizationHeader = "ApiKey " + settings.getElasticsearch().getApiKey(); @@ -193,6 +197,29 @@ public void start() throws ElasticsearchClientException { } } + if (semanticSearch) { + // Check the version we are running + if (majorVersion >= 8 && minorVersion >= 17) { + logger.debug("Semantic search is enabled and we are running on a version of Elasticsearch {} " + + "which is 8.17 or higher. We will try to use the semantic search features.", version); + license = getLicense(); + if (!"enterprise".equals(license) && !"trial".equals(license)) { + logger.warn("Semantic search is enabled but we are running Elasticsearch with a {} " + + "license although we need either an enterprise or trial license." + + "We will not be able to use the semantic search features ATM. We might switch later to " + + "a vector embeddings generation.", license); + semanticSearch = false; + vectorSearch = true; + } else { + logger.debug("Semantic search is enabled"); + } + } else { + logger.warn("Semantic search is enabled but we are running on a version of Elasticsearch {} " + + "which is lower than 8.17. We will not be able to use the semantic search features.", version); + semanticSearch = false; + } + } + // Create the BulkProcessor instance bulkProcessor = new FsCrawlerBulkProcessor.Builder<>( new ElasticsearchEngine(this), @@ -240,11 +267,53 @@ public String getVersion() throws ElasticsearchClientException { // Cache the version and the major version version = document.read("$.version.number"); majorVersion = extractMajorVersion(version); + minorVersion = extractMinorVersion(version); logger.debug("get version returns {} and {} as the major version number", version, majorVersion); return version; } + @Override + public String getLicense() throws ElasticsearchClientException { + if (license != null) { + return license; + } + + // License endpoint might not be ready in IT so we retry with exponential wait time up to 1 minute + int retries = 0; + int maxRetries = 5; + int waitTime = 1000; + while (retries < maxRetries) { + try { + return getLicenseInternal(); + } catch (NotFoundException e) { + logger.warn("License endpoint is not ready yet. Retrying in {}ms", waitTime); + try { + Thread.sleep(waitTime); + } catch (InterruptedException ex) { + Thread.currentThread().interrupt(); + } + waitTime *= 2; + retries++; + } + } + + throw new ElasticsearchClientException("License endpoint is not ready after " + maxRetries + " retries"); + } + + private String getLicenseInternal() throws ElasticsearchClientException { + logger.debug("get license"); + String response = httpGet("_license"); + + // We parse the response + DocumentContext document = parseJsonAsDocumentContext(response); + // Cache the license level + license = document.read("$.license.type"); + + logger.debug("get license returns {}", license); + return license; + } + @Override public int getMajorVersion() { return majorVersion; @@ -499,13 +568,21 @@ public void createIndexAndComponentTemplates() throws Exception { loadAndPushComponentTemplate(majorVersion, "fscrawler_mapping_file"); loadAndPushComponentTemplate(majorVersion, "fscrawler_mapping_path"); loadAndPushComponentTemplate(majorVersion, "fscrawler_mapping_attachment"); - loadAndPushComponentTemplate(majorVersion, "fscrawler_mapping_content"); + if (semanticSearch) { + loadAndPushComponentTemplate(majorVersion, "fscrawler_mapping_content_semantic"); + } else { + loadAndPushComponentTemplate(majorVersion, "fscrawler_mapping_content"); + } loadAndPushComponentTemplate(majorVersion, "fscrawler_mapping_meta"); logger.debug("Creating/updating index templates"); // If needed, we create the new settings for this files index if (!settings.getFs().isAddAsInnerObject() || (!settings.getFs().isJsonSupport() && !settings.getFs().isXmlSupport())) { - loadAndPushIndexTemplate(majorVersion, "fscrawler_docs", settings.getElasticsearch().getIndex()); + if (semanticSearch) { + loadAndPushIndexTemplate(majorVersion, "fscrawler_docs_semantic", settings.getElasticsearch().getIndex()); + } else { + loadAndPushIndexTemplate(majorVersion, "fscrawler_docs", settings.getElasticsearch().getIndex()); + } } // If needed, we create the new settings for this folder index @@ -710,6 +787,10 @@ private String toElasticsearchQuery(ESQuery query) { ESMatchQuery esQuery = (ESMatchQuery) query; return "\"match\": { \"" + esQuery.getField() + "\": \"" + esQuery.getValue() + "\"}"; } + if (query instanceof ESSemanticQuery) { + ESSemanticQuery esQuery = (ESSemanticQuery) query; + return "\"semantic\": { \"field\":\"" + esQuery.getField() + "\", \"query\":\"" + esQuery.getValue() + "\"}"; + } if (query instanceof ESPrefixQuery) { ESPrefixQuery esQuery = (ESPrefixQuery) query; return "\"prefix\": { \"" + esQuery.getField() + "\": \"" + esQuery.getValue() + "\"}"; @@ -835,6 +916,11 @@ public String generateApiKey(String keyName) throws ElasticsearchClientException return encodedApiKey; } + @Override + public boolean isSemanticSupported() { + return semanticSearch; + } + @Deprecated private void createIndex(Path jobMappingDir, int elasticsearchVersion, String indexSettingsFile, String indexName) throws Exception { try { @@ -946,8 +1032,10 @@ private synchronized String getNode() throws ElasticsearchClientException { if (currentNode >= hosts.size()) { currentNode = 0; } - logger.debug("More than one node is available so we pick node number {} from {}.", currentNode, hosts); - return hosts.get(currentNode); + + String node = hosts.get(currentNode); + logger.debug("More than one node is available so we pick node number {} from {}: {}.", currentNode, hosts, node); + return node; } // We have only one node. We just return it. diff --git a/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/IElasticsearchClient.java b/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/IElasticsearchClient.java index 48fbc0a64..92e635d74 100644 --- a/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/IElasticsearchClient.java +++ b/elasticsearch-client/src/main/java/fr/pilato/elasticsearch/crawler/fs/client/IElasticsearchClient.java @@ -52,6 +52,11 @@ public interface IElasticsearchClient extends Closeable { */ String getVersion() throws ElasticsearchClientException; + /** + * Get license about the cluster it's connected to + */ + String getLicense() throws ElasticsearchClientException; + /** * Get the major version about the node it's connected to */ @@ -215,4 +220,10 @@ public interface IElasticsearchClient extends Closeable { * @return the generated API key BASE64 encoded of key:value */ String generateApiKey(String keyName) throws ElasticsearchClientException; + + /** + * Check if the client supports semantic search + * @return true if semantic is supported + */ + boolean isSemanticSupported(); } diff --git a/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_component_templates/fscrawler_mapping_content_semantic.json b/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_component_templates/fscrawler_mapping_content_semantic.json new file mode 100644 index 000000000..c26883493 --- /dev/null +++ b/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_component_templates/fscrawler_mapping_content_semantic.json @@ -0,0 +1,15 @@ +{ + "template": { + "mappings": { + "properties": { + "content": { + "type": "text", + "copy_to": [ "content_semantic" ] + }, + "content_semantic": { + "type": "semantic_text" + } + } + } + } +} diff --git a/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_component_templates/fscrawler_mapping_content_vector.json b/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_component_templates/fscrawler_mapping_content_vector.json new file mode 100644 index 000000000..2177625b4 --- /dev/null +++ b/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_component_templates/fscrawler_mapping_content_vector.json @@ -0,0 +1,14 @@ +{ + "template": { + "mappings": { + "properties": { + "content": { + "type": "text" + }, + "content_vector": { + "type": "dense_vector" + } + } + } + } +} diff --git a/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_index_templates/fscrawler_docs_semantic.json b/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_index_templates/fscrawler_docs_semantic.json new file mode 100644 index 000000000..a8740631a --- /dev/null +++ b/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_index_templates/fscrawler_docs_semantic.json @@ -0,0 +1,20 @@ +{ + "index_patterns": [ + "INDEX_NAME" + ], + "priority": 500, + "composed_of": [ + "fscrawler_alias", + "fscrawler_settings_shards", + "fscrawler_settings_total_fields", + "fscrawler_mapping_attributes", + "fscrawler_mapping_file", + "fscrawler_mapping_path", + "fscrawler_mapping_attachment", + "fscrawler_mapping_content_semantic", + "fscrawler_mapping_meta" + ], + "_meta": { + "description": "FSCrawler template for documents with semantic support using inference endpoint" + } +} diff --git a/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_index_templates/fscrawler_docs_vector.json b/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_index_templates/fscrawler_docs_vector.json new file mode 100644 index 000000000..db6615529 --- /dev/null +++ b/elasticsearch-client/src/main/resources/fr/pilato/elasticsearch/crawler/fs/client/8/_index_templates/fscrawler_docs_vector.json @@ -0,0 +1,20 @@ +{ + "index_patterns": [ + "INDEX_NAME" + ], + "priority": 500, + "composed_of": [ + "fscrawler_alias", + "fscrawler_settings_shards", + "fscrawler_settings_total_fields", + "fscrawler_mapping_attributes", + "fscrawler_mapping_file", + "fscrawler_mapping_path", + "fscrawler_mapping_attachment", + "fscrawler_mapping_content_vector", + "fscrawler_mapping_meta" + ], + "_meta": { + "description": "FSCrawler template for documents with semantic support using basic vectorization" + } +} diff --git a/elasticsearch-client/src/test/java/fr/pilato/elasticsearch/crawler/fs/client/ElasticsearchClientIT.java b/elasticsearch-client/src/test/java/fr/pilato/elasticsearch/crawler/fs/client/ElasticsearchClientIT.java index f52ef352f..6a7dab98d 100644 --- a/elasticsearch-client/src/test/java/fr/pilato/elasticsearch/crawler/fs/client/ElasticsearchClientIT.java +++ b/elasticsearch-client/src/test/java/fr/pilato/elasticsearch/crawler/fs/client/ElasticsearchClientIT.java @@ -1,6 +1,7 @@ package fr.pilato.elasticsearch.crawler.fs.client; import com.carrotsearch.randomizedtesting.RandomizedTest; +import fr.pilato.elasticsearch.crawler.fs.beans.Doc; import fr.pilato.elasticsearch.crawler.fs.framework.bulk.FsCrawlerBulkResponse; import fr.pilato.elasticsearch.crawler.fs.settings.Elasticsearch; import fr.pilato.elasticsearch.crawler.fs.settings.FsSettings; @@ -8,8 +9,11 @@ import fr.pilato.elasticsearch.crawler.fs.test.framework.AbstractFSCrawlerTestCase; import jakarta.ws.rs.ClientErrorException; import jakarta.ws.rs.NotAuthorizedException; +import jakarta.ws.rs.NotFoundException; import jakarta.ws.rs.ProcessingException; import org.apache.commons.io.IOUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.junit.*; import org.testcontainers.containers.NginxContainer; import org.testcontainers.containers.wait.strategy.HttpWaitStrategy; @@ -38,14 +42,19 @@ import static org.hamcrest.Matchers.containsString; import static org.junit.Assert.fail; import static org.junit.Assume.assumeThat; +import static org.junit.Assume.assumeTrue; public class ElasticsearchClientIT extends AbstractFSCrawlerTestCase { + protected static final Logger staticLogger = LogManager.getLogger(ElasticsearchClientIT.class); private final static String DEFAULT_TEST_CLUSTER_URL = "https://127.0.0.1:9200"; private final static String DEFAULT_USERNAME = "elastic"; private final static String DEFAULT_PASSWORD = "changeme"; + private static final String DOC_INDEX_NAME = "fscrawler_elasticsearch_client_i_t"; + private static final String FOLDER_INDEX_NAME = DOC_INDEX_NAME + "_folder"; private static String testClusterUrl = null; private static final TestContainerHelper testContainerHelper = new TestContainerHelper(); + private final static boolean testCheckCertificate = getSystemProperty("tests.cluster.check_ssl", true); private static String testCaCertificate; private static IElasticsearchClient esClient; @@ -65,7 +74,15 @@ public static void startServices() throws IOException, ElasticsearchClientExcept } } - esClient = startClient(); + boolean checkCertificate = testCheckCertificate; + esClient = startClient(checkCertificate); + if (esClient == null && checkCertificate) { + testClusterUrl = testClusterUrl.replace("http:", "https:"); + staticLogger.info("Trying without SSL verification on [{}].", testClusterUrl); + checkCertificate = false; + esClient = startClient(checkCertificate); + } + if (esClient == null) { staticLogger.info("Elasticsearch is not running on [{}]. We start TestContainer.", testClusterUrl); testClusterUrl = testContainerHelper.startElasticsearch(true); @@ -75,7 +92,8 @@ public static void startServices() throws IOException, ElasticsearchClientExcept Files.write(clusterCaCrtPath, testContainerHelper.getCertAsBytes()); testCaCertificate = clusterCaCrtPath.toAbsolutePath().toString(); } - esClient = startClient(); + checkCertificate = testCheckCertificate; + esClient = startClient(checkCertificate); } assumeThat("Integration tests are skipped because we have not been able to find an Elasticsearch cluster", @@ -85,14 +103,17 @@ public static void startServices() throws IOException, ElasticsearchClientExcept staticLogger.info("Starting integration tests against an external cluster running elasticsearch [{}]", version); } - private static ElasticsearchClient startClient() throws IOException, ElasticsearchClientException { - staticLogger.info("Starting a client against [{}] with [{}] as a CA certificate", testClusterUrl, testCaCertificate); + private static ElasticsearchClient startClient(boolean sslVerification) throws ElasticsearchClientException { + staticLogger.info("Starting a client against [{}] with [{}] as a CA certificate and ssl check [{}]", + testClusterUrl, testCaCertificate, sslVerification); // We build the elasticsearch Client based on the parameters Elasticsearch elasticsearchConfiguration = Elasticsearch.builder() .setNodes(Collections.singletonList(new ServerUrl(testClusterUrl))) - .setSslVerification(true) + .setSslVerification(sslVerification) .setCaCertificate(testCaCertificate) .setCredentials(null, DEFAULT_USERNAME, DEFAULT_PASSWORD) + .setIndex(DOC_INDEX_NAME) + .setIndexFolder(FOLDER_INDEX_NAME) .build(); FsSettings fsSettings = FsSettings.builder("esClient").setElasticsearch(elasticsearchConfiguration).build(); @@ -108,7 +129,7 @@ private static ElasticsearchClient startClient() throws IOException, Elasticsear && testClusterUrl.toLowerCase().startsWith("https")) { staticLogger.info("May be we are trying to run against a <8.x cluster. So let's fallback to http."); testClusterUrl = testClusterUrl.replace("https", "http"); - return startClient(); + return startClient(sslVerification); } } return null; @@ -514,7 +535,7 @@ public void testBulk() throws ElasticsearchClientException { esClient.refresh(getCrawlerName()); ESSearchResponse response = esClient.search(new ESSearchRequest().withIndex(getCrawlerName())); - assertThat(response.getTotalHits(), is(nbItems-nbItemsToDelete)); + assertThat(response.getTotalHits(), is(nbItems - nbItemsToDelete)); } { esClient.deleteIndex(getCrawlerName()); @@ -657,30 +678,37 @@ public void testWithTwoRunningNodes() throws ElasticsearchClientException, IOExc // Build a client with 2 running nodes (well, the same one is used twice) and one non-running node Elasticsearch elasticsearch = Elasticsearch.builder() .setNodes(List.of( + new ServerUrl(testClusterUrl), new ServerUrl(testClusterUrl), new ServerUrl("http://127.0.0.1:9206"), new ServerUrl(testClusterUrl))) .setCredentials(null, DEFAULT_USERNAME, DEFAULT_PASSWORD) .setSslVerification(false) + .setIndex(DOC_INDEX_NAME) + .setIndexFolder(FOLDER_INDEX_NAME) .build(); FsSettings fsSettings = FsSettings.builder("esClient").setElasticsearch(elasticsearch).build(); try (IElasticsearchClient localClient = new ElasticsearchClient(null, fsSettings)) { localClient.start(); - assertThat(localClient.getAvailableNodes(), hasSize(3)); + assertThat(localClient.getAvailableNodes(), hasSize(4)); localClient.isExistingIndex("foo"); - assertThat(localClient.getAvailableNodes(), hasSize(2)); + assertThat(localClient.getAvailableNodes(), hasSize(3)); - for (int i = 0; i < CHECK_NODES_EVERY - 3; i++) { + for (int i = 0; i < CHECK_NODES_EVERY - 4; i++) { localClient.isExistingIndex("foo"); - assertThat("Run " + i, localClient.getAvailableNodes(), hasSize(2)); + assertThat("Run " + i, localClient.getAvailableNodes(), hasSize(3)); } for (int i = 0; i < 10; i++) { + localClient.isExistingIndex("foo"); + assertThat(localClient.getAvailableNodes(), hasSize(4)); + localClient.isExistingIndex("foo"); + assertThat("Run " + i, localClient.getAvailableNodes(), hasSize(4)); localClient.isExistingIndex("foo"); assertThat("Run " + i, localClient.getAvailableNodes(), hasSize(3)); - for (int j = 0; j < CHECK_NODES_EVERY - 2; j++) { + for (int j = 0; j < CHECK_NODES_EVERY - 4; j++) { localClient.isExistingIndex("foo"); - assertThat("Run " + i + "-" + j, localClient.getAvailableNodes(), hasSize(2)); + assertThat("Run " + i + "-" + j, localClient.getAvailableNodes(), hasSize(3)); } } } @@ -778,7 +806,8 @@ public void testWithHttpService() throws IOException, ElasticsearchClientExcepti URL url = container.getBaseUrl("http", 80); logger.debug("Nginx started on {}.", url); - InputStream inputStream = url.openStream();; + InputStream inputStream = url.openStream(); + ; String text = IOUtils.toString(inputStream, StandardCharsets.UTF_8); assertThat(text, containsString("Hello World!")); } @@ -787,6 +816,86 @@ public void testWithHttpService() throws IOException, ElasticsearchClientExcepti assertThat(esClient.getVersion(), not(isEmptyOrNullString())); } + @Test + public void license() throws ElasticsearchClientException { + String license = esClient.getLicense(); + assertThat(license, not(isEmptyOrNullString())); + } + + @Test + public void testIndexFsCrawlerDocuments() throws Exception { + // Remove existing templates if any + removeIndexTemplates(); + removeComponentTemplates(); + + // We push the templates to the cluster + esClient.createIndexAndComponentTemplates(); + + // We remove the exising indices + esClient.deleteIndex(DOC_INDEX_NAME); + esClient.deleteIndex(FOLDER_INDEX_NAME); + + // We create a document + esClient.index(DOC_INDEX_NAME, "BackToTheFuture", new Doc("Marty! Let's go back to the future!"), null); + esClient.index(DOC_INDEX_NAME, "StarWars", new Doc("Luke. Obiwan never told you what happened to your father. I'm your father!"), null); + esClient.index(DOC_INDEX_NAME, "TheLordOfTheRings", new Doc("You cannot pass! I am a servant of the Secret Fire, wielder of the Flame of Anor. The dark fire will not avail you, Flame of Udun! Go back to the shadow. You shall not pass!"), null); + + // We flush the bulk request + esClient.flush(); + + // We refresh the index + esClient.refresh(DOC_INDEX_NAME); + + // We can run some queries to check that semantic search actually works as expected + assertThat(esClient.search(new ESSearchRequest() + .withIndex(DOC_INDEX_NAME) + .withESQuery(new ESMatchQuery("content", "father")) + ).getHits().get(0).getId(), is("StarWars")); + assertThat(esClient.search(new ESSearchRequest() + .withIndex(DOC_INDEX_NAME) + .withESQuery(new ESMatchQuery("content", "future")) + ).getHits().get(0).getId(), is("BackToTheFuture")); + assertThat(esClient.search(new ESSearchRequest() + .withIndex(DOC_INDEX_NAME) + .withESQuery(new ESMatchQuery("content", "Flame")) + ).getHits().get(0).getId(), is("TheLordOfTheRings")); + + // We can only execute this test when semantic search is available + if (esClient.isSemanticSupported()) { + // We can run some queries to check that semantic search actually works as expected + assertThat(esClient.search(new ESSearchRequest() + .withIndex(DOC_INDEX_NAME) + .withESQuery(new ESSemanticQuery("content_semantic", "a movie from Georges Lucas")) + ).getHits().get(0).getId(), is("StarWars")); + assertThat(esClient.search(new ESSearchRequest() + .withIndex(DOC_INDEX_NAME) + .withESQuery(new ESSemanticQuery("content_semantic", "a movie with a delorean car")) + ).getHits().get(0).getId(), is("BackToTheFuture")); + assertThat(esClient.search(new ESSearchRequest() + .withIndex(DOC_INDEX_NAME) + .withESQuery(new ESSemanticQuery("content_semantic", "Frodo and Gollum")) + ).getHits().get(0).getId(), is("TheLordOfTheRings")); + } + } + + private void removeComponentTemplates() { + logger.debug("Removing component templates"); + try { + esClient.performLowLevelRequest("DELETE", "/_component_template/fscrawler_*", null); + } catch (ElasticsearchClientException | NotFoundException e) { + // We ignore the error + } + } + + private void removeIndexTemplates() { + logger.debug("Removing index templates"); + try { + esClient.performLowLevelRequest("DELETE", "/_index_template/fscrawler_*", null); + } catch (ElasticsearchClientException | NotFoundException e) { + // We ignore the error + } + } + protected String getCrawlerName() { String testName = "fscrawler_".concat(getCurrentClassName()).concat("_").concat(getCurrentTestName()); return testName.contains(" ") ? split(testName, " ")[0] : testName; diff --git a/elasticsearch-client/src/test/java/fr/pilato/elasticsearch/crawler/fs/client/TestContainerHelper.java b/elasticsearch-client/src/test/java/fr/pilato/elasticsearch/crawler/fs/client/TestContainerHelper.java index 110e1bd6d..97c3d491d 100644 --- a/elasticsearch-client/src/test/java/fr/pilato/elasticsearch/crawler/fs/client/TestContainerHelper.java +++ b/elasticsearch-client/src/test/java/fr/pilato/elasticsearch/crawler/fs/client/TestContainerHelper.java @@ -37,6 +37,7 @@ class TestContainerHelper { ElasticsearchContainer elasticsearch; private byte[] certAsBytes; + private static final long memoryInBytes = 4L * 1024L * 1024L * 1024L; /** * Start the container @@ -56,10 +57,11 @@ String startElasticsearch(boolean keepData) throws IOException { .withTag(version)) // As for 7.x clusters, there's no https, api keys are disabled by default. We force it. .withEnv("xpack.security.authc.api_key.enabled", "true") - // For 6.x clusters, we need to activate a trial + // For 6.x clusters and semantic search, we need to activate a trial .withEnv("xpack.license.self_generated.type", "trial") .withReuse(keepData) - .withPassword(password); + .withPassword(password) + .withCreateContainerCmdModifier(cmd -> cmd.getHostConfig().withMemory(memoryInBytes)); elasticsearch.start(); // Try to get the https certificate if exists diff --git a/framework/src/main/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtil.java b/framework/src/main/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtil.java index 56318c97a..9dc524491 100644 --- a/framework/src/main/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtil.java +++ b/framework/src/main/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtil.java @@ -599,7 +599,7 @@ public static int extractMajorVersion(String version) { return Integer.parseInt(version.split("\\.")[0]); } - public static String extractMinorVersion(String version) { - return version.split("\\.")[1]; + public static int extractMinorVersion(String version) { + return Integer.parseInt(version.split("\\.")[1]); } } diff --git a/framework/src/test/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtilTest.java b/framework/src/test/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtilTest.java index f60d79bdf..9baf80e31 100644 --- a/framework/src/test/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtilTest.java +++ b/framework/src/test/java/fr/pilato/elasticsearch/crawler/fs/framework/FsCrawlerUtilTest.java @@ -95,13 +95,15 @@ public void testIsFileSizeUnderLimit() { @Test public void testExtractMajorVersion() { assertThat(extractMajorVersion("7.2.0"), is(7)); + assertThat(extractMajorVersion("8.17.1"), is(8)); assertThat(extractMajorVersion("10.1.0"), is(10)); } @Test public void testExtractMinorVersion() { - assertThat(extractMinorVersion("7.2.0"), is("2")); - assertThat(extractMinorVersion("10.1.0"), is("1")); + assertThat(extractMinorVersion("7.2.0"), is(2)); + assertThat(extractMinorVersion("8.17.1"), is(17)); + assertThat(extractMinorVersion("10.1.0"), is(1)); } @Test diff --git a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/AbstractITCase.java b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/AbstractITCase.java index 104865df2..8bf8e77c1 100644 --- a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/AbstractITCase.java +++ b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/AbstractITCase.java @@ -38,6 +38,7 @@ import org.apache.commons.io.IOUtils; import org.apache.logging.log4j.Level; import org.hamcrest.Matcher; +import org.jetbrains.annotations.NotNull; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; @@ -71,19 +72,16 @@ import static org.junit.Assume.assumeThat; /** - * Integration tests expect to have an elasticsearch instance running on https://127.0.0.1:9200. + * Integration tests expect to have an elasticsearch instance running on https://127.0.0.1:9200. * Otherwise, a TestContainer instance will be started. - * + *
* Note that all existing data in this cluster might be removed - * + *
* If you want to run tests against a remote cluster, please launch tests using * tests.cluster.url property: - * - * mvn verify -Dtests.cluster.url=https://127.0.0.1:9200 - * + *
mvn verify -Dtests.cluster.url=https://127.0.0.1:9200
* All integration tests might be skipped using: - * - * mvn verify -DskipIntegTests + *
mvn verify -DskipIntegTests
*/ public abstract class AbstractITCase extends AbstractFSCrawlerTestCase { @@ -274,7 +272,15 @@ public static void startServices() throws IOException, ElasticsearchClientExcept } } - FsSettings fsSettings = startClient(); + boolean checkCertificate = testCheckCertificate; + FsSettings fsSettings = startClient(checkCertificate); + if (fsSettings == null && checkCertificate) { + testClusterUrl = testClusterUrl.replace("http:", "https:"); + staticLogger.info("Trying without SSL verification on [{}].", testClusterUrl); + checkCertificate = false; + fsSettings = startClient(checkCertificate); + } + if (fsSettings == null) { staticLogger.info("Elasticsearch is not running on [{}]. We start TestContainer.", testClusterUrl); testClusterUrl = testContainerHelper.startElasticsearch(testKeepData); @@ -284,7 +290,8 @@ public static void startServices() throws IOException, ElasticsearchClientExcept Files.write(clusterCaCrtPath, testContainerHelper.getCertAsBytes()); testCaCertificate = clusterCaCrtPath.toAbsolutePath().toString(); } - fsSettings = startClient(); + checkCertificate = testCheckCertificate; + fsSettings = startClient(checkCertificate); } assumeThat("Integration tests are skipped because we have not been able to find an Elasticsearch cluster", @@ -304,7 +311,7 @@ public static void startServices() throws IOException, ElasticsearchClientExcept managementService.close(); // Start the documentService with the Api Key - fsSettings = startClient(); + fsSettings = startClient(checkCertificate); // Start the managementService with the Api Key managementService = new FsCrawlerManagementServiceElasticsearchImpl(metadataDir, fsSettings); @@ -315,12 +322,13 @@ public static void startServices() throws IOException, ElasticsearchClientExcept staticLogger.info("Starting integration tests against an external cluster running elasticsearch [{}]", version); } - private static FsSettings startClient() throws IOException, ElasticsearchClientException { - staticLogger.info("Starting a client against [{}] with [{}] as a CA certificate", testClusterUrl, testCaCertificate); + private static FsSettings startClient(boolean sslVerification) throws IOException, ElasticsearchClientException { + staticLogger.info("Starting a client against [{}] with [{}] as a CA certificate and ssl check [{}]", + testClusterUrl, testCaCertificate, sslVerification); // We build the elasticsearch Client based on the parameters elasticsearchConfiguration = Elasticsearch.builder() .setNodes(Collections.singletonList(new ServerUrl(testClusterUrl))) - .setSslVerification(true) + .setSslVerification(sslVerification) .setCaCertificate(testCaCertificate) .setCredentials(testApiKey, testClusterUser, testClusterPass) .build(); @@ -337,7 +345,7 @@ private static FsSettings startClient() throws IOException, ElasticsearchClientE && testClusterUrl.toLowerCase().startsWith("https")) { staticLogger.info("May be we are trying to run against a <8.x cluster. So let's fallback to http."); testClusterUrl = testClusterUrl.replace("https", "http"); - return startClient(); + return startClient(sslVerification); } } return null; @@ -548,13 +556,13 @@ public static String[] split(String toSplit, String delimiter) { public static void deleteRecursively(Path root) throws IOException { Files.walkFileTree(root, new SimpleFileVisitor<>() { @Override - public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + public @NotNull FileVisitResult visitFile(Path file, @NotNull BasicFileAttributes attrs) throws IOException { Files.delete(file); return FileVisitResult.CONTINUE; } @Override - public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { + public @NotNull FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException { Files.delete(dir); return FileVisitResult.CONTINUE; } diff --git a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/TestContainerHelper.java b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/TestContainerHelper.java index d88ca38dc..b4334f06f 100644 --- a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/TestContainerHelper.java +++ b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/TestContainerHelper.java @@ -58,7 +58,7 @@ String startElasticsearch(boolean keepData) throws IOException { .withTag(version)) // As for 7.x clusters, there's no https, api keys are disabled by default. We force it. .withEnv("xpack.security.authc.api_key.enabled", "true") - // For 6.x clusters, we need to activate a trial + // For 6.x clusters and for semantic search, we need to activate a trial .withEnv("xpack.license.self_generated.type", "trial") .withReuse(keepData) .withPassword(password); diff --git a/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/elasticsearch/FsCrawlerTestSemanticIT.java b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/elasticsearch/FsCrawlerTestSemanticIT.java new file mode 100644 index 000000000..9c099c63f --- /dev/null +++ b/integration-tests/src/test/java/fr/pilato/elasticsearch/crawler/fs/test/integration/elasticsearch/FsCrawlerTestSemanticIT.java @@ -0,0 +1,62 @@ +/* + * Licensed to David Pilato (the "Author") under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Author licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package fr.pilato.elasticsearch.crawler.fs.test.integration.elasticsearch; + +import com.jayway.jsonpath.DocumentContext; +import fr.pilato.elasticsearch.crawler.fs.client.*; +import fr.pilato.elasticsearch.crawler.fs.test.integration.AbstractFsCrawlerITCase; +import org.junit.Test; + +import static fr.pilato.elasticsearch.crawler.fs.framework.JsonUtil.parseJsonAsDocumentContext; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.is; +import static org.junit.Assume.assumeTrue; + +/** + * Test if semantic search is working as we could expect, ie: not activated when it can't or + * activated when it should. + */ +public class FsCrawlerTestSemanticIT extends AbstractFsCrawlerITCase { + + /** + * Test for #1996: https://github.com/dadoonet/fscrawler/pull/1996 + */ + @Test + public void test_semantic() throws Exception { + // We will execute this test from version 8.17 with a trial or enterprise license + assumeTrue("We don't run this test when semantic search is not available", + managementService.getClient().isSemanticSupported()); + + crawler = startCrawler(); + + // We expect to have 3 files + countTestHelper(new ESSearchRequest().withIndex(getCrawlerName()), 3L, null); + + // 2 pdf and 1 txt + countTestHelper(new ESSearchRequest().withIndex(getCrawlerName()).withESQuery(new ESTermQuery("file.extension", "pdf")), 2L, null); + countTestHelper(new ESSearchRequest().withIndex(getCrawlerName()).withESQuery(new ESTermQuery("file.extension", "txt")), 1L, null); + + // We should have semantic information + ESSearchResponse response = countTestHelper(new ESSearchRequest().withIndex(getCrawlerName()).withESQuery(new ESSemanticQuery("content_semantic", "Someone understanding loans and finances")), 3L, null); + DocumentContext document = parseJsonAsDocumentContext(response.getHits().get(0).getSource()); + assertThat(document.read("$.file.filename"), is("3547447.pdf")); + assertThat(document.read("$.content_semantic.inference.model_settings.task_type"), is("sparse_embedding")); + } +} diff --git a/integration-tests/src/test/resources-binary/samples/test_semantic/3547447.pdf b/integration-tests/src/test/resources-binary/samples/test_semantic/3547447.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f7ef79986f97ac32d1083b97edba0c8e79aff380 GIT binary patch literal 21607 zcmd40b980RyEPiywv&!++fJurXUDc}+crD4la6h(W81jh?|XjdoIAcT?)UHAW9+q8 zRXxvBvudq9#;iHX6-CAAnCMwyn1~pOYz-}7czIzMzMDH)0f;C+zo=jsL>vGHPPPtz zTR^TrmOy4ePC(W`R)4OxKz2aJKqf#$K#V~2Kukc)f3B>5G87$bjh&4E|E4JY2_yny z{WHY*=gaY*g66h1q6SWX<`Cs(W@Kh!C+2Owe)b90hVar%=F!=NHW4!@4{)J>e}$w}h)fKu z9RGU6AT>o|m+1S|r8D?Vqw3=#T91Pt5R{Wn$`zKvO#enGV#w!0a_MesiDf~B7{Lec6i4tcgV*l?Tbp=BUfYCn# ze}evJ1OHjgUzA!z|Gi&-H{d_OWdSy(PG&@$|6&z4xB9cmL=56qf1aNxz{u7Z@VAzu z)1UXu8pbW_LPsVRw+*@bOzjx2*NAPE0k{vua$4384+M*VC{zr=A9Gb7nZfX!*jHfA z>1?oORj)2Ytf^5K)KVdKfh=;+!$;NYD|Pxy`|I}Y^<#MYOUD=c>+{*Y$^AHE@_O6j z>uB=@bXg$kOZO?6@T=?d*7?Eu!}`Je!Tjm#V|)8YXbZyC^T)Vt<m)9JnB zzFv2%n6nmwE5Ah8(ioelom!O&g>!P9sPzys*kSZzYJD3{<6TO(BoUZ4 zL|ap4wDPRcGOOPzgxdy{S`~7b2d~#wy9R|`{^yvttn@sfu#S6sYP~=rx-bCVOXe(C zUKzmxr;mxYtV$!s-Rs7ihfG;GDF|MXZ^|JdHk^+~E+5XKk6@FnYk}n;rXb-veWa+X zARUZ4Dtiv->~MZ|Oj=~rC){gls(Z8gj@B#I0vK>#DfxNx!wp(-MZW85r6p`@J& z*pNBO!ktT!A&tURLe+A>L*vKf7;iH>xXp+x4kH)1L-$7w*&u*{^MXdS>UHoBEz1$a zF;pxOO{+R>p5+>dQe*@n7c5n_56sF_JkS;dRm&8063!2KA1*A^R-Wt45EQVdf~-e7 z1U3QwhV%qsK(MhKj6^BQ+cus3vlsnF&fj4AflUuKS~L(otk6yXVFfq@9SSKf>!dTf z1HKbpf;#Dj@_rt=069dv>DpUSy^=R$d@yj&ix)%D>@sa>P$%>^R4$OFgE^SJybYZF zkAg?kr@hv;pvc0Z`0!#5YA>U1vpr~`@9P@IxQLqmAmG6MKl1lBlEb80Kao(MGVP{F zaj{G5Hi;?sAcJQFTDu)4{c5nOm`W4~+AFT;eF~Wt+%|w(LwKf;^P&3-e9cZDosmno zo$3~$F+@ncS$-1;Vut#g7J;%2fKRD0`F=-W#;SZ_)q8B7mA~fLid)E{E+y|i)*M?-CwR0ov$+o80V3^=l!t#!G>5tE zxTCUP8*~Cq!BR)X)si>#;<%l}BgRVtR97!)Y#8w+F?jKEA`U8b$}T@k+kK=R;{XXE z3APguRL(Eq9uyc&B6P+|U$6o_9(8O}aZ%|^qO_A%xFhu|jKt|A-!>l)i zOMeor>z9nC&INKIUm=DfUcn`gI@~vN@3>>sM?WuSREzxJ(I~LUip@s@UJ(`uGKH#N zdK6)`ch}n7YYryPXrChuCQPoL79TK*q>|`3>^XM$2WU6ZZXDpqKHXi=p5&AMQ&}+#)>sU`C02k6gcS=IfqYxSzj2xDr7 zQAGoydjk{4!UVc`;&FmC#XR2K(GEAsVQD*BbLD z#sf#K+VN=i-69N!5RTE>*|)QCOf(HaPk)**331gcb#seYgVydndBiypxkcSg^CR~w zdr|Jb3+A`=yNSH;si_pRXHVm%r%SW^@H)kbzE*|id^<&f;-ZeM7*-1DZljjJ?l!2bUh}gYPlf>w<+ld>>K5B3 z`0q@k9c`^_#4s2_*#O16=eTcRSBx!A-Us7zvuc>Q-^52X)TDh!$PokDv)D0_wBKqY3 z@v}P(_&$F(7$qJC#b~^N$K|Div|$XCQUyr-sGIvdj{niDqf*Y9_E69_U1c zh=XHbj#yrYz`yPo3u0GAAYkvv;->9wjH$gdWRzqozW4^jRoRUPbjvqLP3tCM|K8A9 ztV($HT#m%Y3))ar2;_;U#SM+yQz?8z+&SMW&)7>qRhLh9Ik zMgsN<1`FFvDCjTZEA3DSyo=$9ZB0^3Y9b765#Si0B2We<1qMX5Mq;#Uo+rfTUtoqf zw|b;^Q#UOn*H!fDl4G)S^4YB$rX|X3fS2zL&>n6#tsNk?v~)B+T6*!}-%jXMURDp} z7{Tbu>+juuv^H)qHktVq)r}Rw!{@yfR+BFfB(_2I>^LasgAb@(!}!B3CHhUyuj^)v zY}3-SN<*A_Lm|z57lkF^$O_>lfF{uP_|}eR{W_cYJ?w=fpB7v7<^Wq`&YK8R_B-)O*W1jc9kgVsmoD?^=Yn{D};2Y=mq%ED5 zc1*SPMCzg~tnJ0pw}@>4_LX7C`+5l@Yo2-`SfICsspMkmi#6Er?*!*6Z?A?BEg+BP zEs*LX`C@u=8kktwap96r%BcC8B5u7dk zH6=MEQtJ}_XHqiFae{cpB(0Ogd@0|cQ2a8}w%)16@jW9(&_c;?+{$(wxN>ZewSOpg z%z$-?D5lqSS{LrXNfms!#3g8xDb4m?@%9>&v+nYxgHgeVAso3lOQfV(A!JP^tFHzr zr8>a=1ObqQu{cE0comC&1>4|GLmmMQQQAzP;_cq^M*5Y~-NHm{Pzd~nX&hY?z75b? zt5Kn%qx9GnV$LBO^J`nyKJN>T6q7*2%~H(ql$|5Z0V&c~j}Au84ELdZ-^;qf-cA)W z@#6BQwJF97i*lkOZH=enDz%!d`GguCw@(}Z0~8T~{|4mT`ng{0F$@OJ zVg+e8&uhh`TuRds$#-Ct3Ivw49{^2FGMZp~x4;f;d)ytmHX2Sei11vWZ~Ag(j{cxp z9A6}XDZ5vO+^FZ~g~Z-l>y?S1&kn$8blPBLq`WN*SVzlo@?fdOLy=WFE^ZUz;=>q5 ztO}3ShKT$bE_eqAFL1hX%c@j$@}fKGe42HdfyRVr=kK=DMeEjJHgml(GZBBNXna1$ zV5husSP-6W40VXnH*2fY2pbAi>b&_$<|x4#xQ1578=T)q-Y(pjQogFRQCw6u^c^kL zo-)2&)FEqd_#!z?(`1kb6uxB;5gs@6E z-7FZm#H#=dORIu*DL38UssSWLYSbjjZ37dS11&MR5BC}NLUfn?EPg{6`nrvzM04bv z%h#k_1%fhC-e!Bn6x^{Q`)CFCk9Xy^5n41hXf#HWX-+V037$st{iK^2(+=k9`P@Fh zKYmKO5ra>Ls|8=nC2k>jyOa$S=xng;XrM{r4Ir==gP=gMfHofzVH324G)bNzsGKwV z>+X|`?-{09l1Gn+UE?_?oMbyS{Ia_Z#iMBNA+y&+&m-Ecc_$bc#UL?vMB35yAKhdkURY#Ii5G zuQH?y=!JN4E_0oj1|T4e?Y%=&2uIro-w!L%8X}3^E-scNiY$Odi@CXx@ZDEZHywDy zW$yCMD?C5R`No>TG3TuaUmQt6uZ=h_$1PKeM(@eyHOC_d}F4SFlT~_c%;!sRP8jLMB zacg%>1K2YZpnpChd=^UuBdh|1Dywk1b}xnkm><7YhnS=uXuv&hgQT#uZA-`b0k6f# z?eJHTbZMZZ!Sxn9YkjlGm3_LZGO(qn$kBZe9gjf}YHe5B#fZGcaM@AOSx_Ns zA)Tfx>$J^I?*S{j{?L?;R)7 zdOJcO=RnW#NFFxkFs?WYAF@u$8xH{?9ly}UlY$8wfOjJ3abGIix?&Oa&&-(4g?o&PW=3)y?>5)SR+&!oBUemxlbG>e zElX#&ncdfyui1evOa;9MpCWqSbVI>dwhP)VRxzad@=_YAD(9TaGiCamf@ubEeYt`s zTqCYNt9mBo5to~|s?X+M=TE-`ib+BK>M8$q)c-Ga=dUdAS1Dj+WM}=4+QRf#Px+60 z@ckdjN5sI%z{=M2ui|3*M^E`zaQR;v#Q#ga`X}T6Q=nn`SNmZ2ABBeLuf+35OZksP z^RMo~@PCzP{>l3f*8l%%G=G=#7v=xbX#UxQ{~-S_k%sBN6s7;WN~2|oD}m;-T(fc{ ze4f6Vi6Ko~mncjKGZexf#E%x*97M6OpYPjK|0AXIPU9_`YXZA^gI!U}$oy!cO-~|K zVC(b8$4zy6XZuG-`}Rj?yG~nVxASk`#os*~)0q9QhEJYNj}Mnmr=R1e`5x+l5bprIY=Jeb1x76Sdu?jMJ%5*(0J zQ6@VjXIMzo15iAE7KS7Sz^$(t*-JVS%y@>qr$|QFTYG*;&lrjf-skQ+TAia)*vScX z?PjMxs_0L-hk|=X5J?Ks!KV!1C@Ba-^QXg$_qRdlZ*3AhBI^1$3F- z7ySjSK`;@o1lo#V{C&vTCgw2OEMlc?bk{c0g0j_mgRnZHIow02{Gb?`o`t8N=`-?pcU^GE zKZc=LzA0^Tij$ZV8AAh>Q-)+XWYOp2m7~a^yamAf1~GKvshLc7s{!Y^r$t8#u_7Yy znW6%U$&d-G9 z3ReB>jZSouSWp4p)X(m>rthp=r^dW>@K=74+0^_(z%-GF;^SyIIq@NoROo!D=<(oA zHo8RXz_*CTJK>k8s}bz^HZJIyzwTv8GE|9pO!0GS_EYlXcpZ#{S9KFHWz|4I*2CLW zbNUWfm!0kg?)K$SY1;7kjbgO#l;`_5V4%Rz(V7eBa+co4+`z}FNHC~=S)P+In80qJ zej+6S=mFSd$+1{wRq`=DkajzMSG@wegP50dMGZd>pYzV^C-epHh$tz7xzIurmgqOS zAEaiw)3$I)k1e?|(r^enV|fTo$Pm{Hz)OL=A>PM3`rX131~i3iBHm=}IK}WeB_zm` z<@`e@y29{2a|@z15)Y)FY{9?{2}F{mnibj6?lG>*eycji(=7q-o9_OEfq#TOX$MbQ zZ>G3V{8}oyG=Gk6mA>*q-{H{}T#*>XQijSKp#uuN#ZYyFRL^B*Vc|-uHZW~$!n?ms zSExGhmqAwHl9)!%?Lh%!DYdw!Wignpj!{DJ$2IiV*sUw?-#Rk$*la4AeBeUIX2vXn zDi4kN?HFNbG-~kF_Iv$%Ch&sO^tl(>m&bO;&TCj`eAxGC)BEh|>8@POp%W{yubMoP zB!W#-^Oe}>rUDo89JX33wdzc&#dBQaGpgJmQu$_bC^;TJ)~)6~9)meP_^B@}J^dS$ zMhT2<5hj2q1~*mJa_|ceod1cC?s47=xDW*x!_3YWbJw`&Oh>X2EpHbE=`s4j4cM!h z#@hOX3jL&F#<&)y{R;_C&-zU2kj%w5o}9e9j_Ob6z(2sfgjJS6kae}}>xPr`ch0_Ws=&~f5H04<~n6~(M$d71@2 z4f`LWDK2cFSH*87vpD1a=36K!h%pq) z+w;fWvh%o#C8?&sN?6ps8H1hJ=&6l!6>33<)9SlCL0sI}4;z-7Mz(aq%!+9-yrBTU zd?rQdJ9k*As~V_IA7pFQxR0SoCz111=h1%H+6^#l72c*LBd>j~QQ#Uc+6@mnQbkVO zhEYD0R>>@58de2L$oYN$8BN>Q?*M)ydidRsMb~gy^a5rNAcWjCo%Q5U=tX0U+W*4` zW6P|6RrXp(8-`(Xe+u#>tf$Ghsps90E5T1~|K`3$N>_K*+MFrv7oT3A&c}NrFIGca zFBC6A&di8?A)-~KnTtUDz$B<;1QQ+;INh|6+OhI?<`FuMA(31>C7e>5 z82J(-w-y(Kx8I{J_e^tm+su?uH*MENrg7kxJk!@%l#8v}2cxl-X&${uiUy5l2>ajZ ze6ShGpn?RO74G?pGomn`THKHmrc?TMSiT+=&14R(g|Zmmb+HDGoe&yxSC_5cMb7&c{CCE{wsL zjALSOrpnkt>+>wHxtf^myY@XIQu1OS6f{Q0nGB2`BV~=Sd1Nj(CB?g;Uf}ZX>9xGo zBDSAebdArC_mCJ*8kc0%u@2Dfb83QaBkqd^qQJKPII-%g-Fcto&4B)ylESc6;Cff% zkv2$*(u)tpD3hIeYVyDUR&}ZIe-hz;sMlXQ#>~RO^xsVQe~dIDhJTsxUvo|r;ArGv zZuiHR{nxCMH?aO=iz+D#{rS**H@5~j$^%@LZLJM#lmVvBRtA5=tPD&Y{}_h__Auk0QbLYKf9+3pW;S}Re}iED_>lj0(lK$d(sMGiGBa`hi4-xglLVNX znmPTGr{V;#{z1gbLeIs@#=^wR_Ft&}uucH;pV|Lm;{S`>|L2VG*Z2IFs4@{Tv-|=4 zC-1+AEA!t2z#sble5qKvnw@5Kk2>9I_2iayog*Fi>$lXymR8?lep}Vk ztxH>t#vcSBrJxCGxhUt+HChGNqlJKR2Md=br95JD>y*fjC!I8klrk2>eaNKde*nL| zeoeKh)mwpFY#+URnI5`_k)Bm;zjQxoO}S1n1ye?h70RoyiowUKe#cGpj1?JozUJcX zadWtQx!>-DInYx{YH=pU$d2Zle_}v8fECo*1YsRPG|LHjrCpH0NSVVlk17AI_{Awk zY{JBa%J;Fp7)m+zMRf0LNaZoh%qEJTUnU5jZF z?wIBRZEa=6Z?#1d;@To92gSf(&^2t@uaP;zaIvZ#&4)Om+vtLouK|MZO4ktcDyX>J zFe1SvGPkrsA5Zx#DnwCodMTvh?Cf-rji6=tq5^NqP3&FkrZNfUF03LNqDhkCfs@asx~zK&>f9{S4_9AQXWu^Z z@@=xs@p$9KwKen$BL9fc{iQoyX7=P{n$N-NB?0@UQqLpcjqm9-HCNd8jgwk@s`T+y^HG49&|iAi^-ptM6aigkR{w}O50QG zZG6_%4VJl7%9OVR=^K$A9%`145t^bUF7g>~FpLylmW44o}+W zHyB|mpgbHFLWl(2zz;3o6TS5>;&TZj`+Uxfx4r7c1^wq7}yR@|RkTkD^zz;7= z+r&8S?g~=i<6~LUH+LI7PYrA)cWcH!s1PX^R8)AkN%XOi{(k1 z5v+cW_O$w~20Nqu$VkCuVR^8EO$>&M&L_F!d??%g{$RS>>9V9DTSv0mrQqblMJpaA z3ByUwQH?{Lw#IAs=xM{_a)yzbl6H}}=!dFbNHQ9^J@C{C zzi(z(`TCQi(6#6iHvphR`k>s?c?>~b-9A%Wrc8wiD^Gf&Eqv=ojb~9PDXLiSgvHz1 zi8<7C=nb^h%RIcOJrcGK7E$TA_wx|1=0On|gOtGw0U7F61}j5$G>!8C-MuRtopsPK z4|z0kB$pL&{hAKW<(__Tpks-r3;Bc-Y7n6CljIUhE-xPho(90owV*z)o&CiuSmQFK zztrqqO3tBLJm<8l7VTs@STh2Ii@v|$#Mo~eJxGUIM>JPy%BWu=&L*H#gX}>@cgc5x z;qjE?RN%G_J^mbk*a!K9qXG8*uhW83yl`8K0G6fbF00WMNMtFu(rsE{`mdM zvV-hlOn-t9vCwk(ooj!=FeV)2dMJL$LH}NLQ&ceH|wZwD?d8k0h)8zdiW;C^M>Kc!@W@q zMI4VMd{D#g1K<3{s|rCP+&hqEjZ-4B30_&zM_C*tS-L&92qU4qj2t!9{1Wk55B=M2=ng{nOPHgxDNIVa*Eu4%U<*W{Q<{ zVTpjx;X&-}k=*J{LFXOmsXs84gq7W6J{mFiMY4RYiV02lp65NePN}i*!AQ;b?XFb5 zDO4n1!SeEqQ1mz%QrCNrC3lM@X3g0`g$UJ|8>m{j4C};ND{X5s6K$I4s&x-V!poYm zK`p#?F<(ON1(NEQRS;JbX`+t zYOpx;G~h8@s!%E4FVF1OZOBEA!L^yRp{-;;hKGkcn;tE46I(9mK7>S-Uy#GSzJ_DR z(kg}(tRv>4rsDGWyu*b(YBMJxIv|QT9%n{vwt%B67J4OBrWyRydFqk$=3OLQS4yUzk>?v`j4C$8F0I z=KG@|#Bg6kpJ8XzJhqL1)84gEDyl!uFT~n&gmpZ%`-O9D<8P;z8u%~RuPsJM#2U>B z*?PW(u3w*uBclat8M8Xm`5Zz_oW39oMY*8-I!HZ2h*!CTSk544*!1qvQh;N$efmxG zxDL2F98@ri`*ZJ5gy8kriHYr$P>T%e4e}*Woopy{o=nYG8V@m^)@d6Vt*rB(Ix@N3 zc2H7)ix)%aHEbzFSLX(KNlhg3wP$i~o=sU*sCQ&v#xdzE41dt8)zvwgcYwaG zt<4>)CrZ^Mk-B^gpCq6pw!wC-7_mp%-GaSPkiU@ei4W!}OW>gwCcUsjA=7q9KDbhc0B(;;O_ zL(tGva*GwbgG9Dv^Q-`vFeC5*e2eHGfZ$+pZ%mG5GuDvun#g_KOJ4f6@l&aK7kPdLrJuGQP zknIJ46jvy!2ym#{)a6z3bs<2wZ{%M_ntUHREE%9X;5j`FNmD`Z+cQ*%p4Ml@@dk~v zPhHF8#k8-%Z&AOyE52<%twc7&Q_DZ}5J?wM+}wrT`OrYj6|=a;%}!8y$Zy8Iz+jXIVf%%9iRF#a3C1i@jillvUY(+Vy^*N_ z)Q>DleNLZ_)bdN}0Eancb4Arm)(x$Uo`}jVJUyapQubc=>T#Z89 z;k+zY5HzVqtEP{|u?7B>lMqG)dobJ`)VE;TUsTJ%QwJ)TL75g*t3vc9wyf=QOvLTE zdJ&)f5y*V))K&cbrn#lFz1~EP!8n4iD^ayid3FGip3djNlu(W|vN&bS6JM)}@_T*V z16V0EPSo2YpTkkkCI!5@C4SWx>5ixvXZhBK|8=@(hg^BvRQP`Jk7u>h5WPe6bBjss zda;`6_6(VuW>H$W+I9m*j?QjIKlTkXGF+a=I&tvZ>nqsz=i3H9Hf-0I^lNKJ>>m>%UB=D#3g|X-%9=n9=HKde@HM^rXa; z64_-f4RJ>8KdesgkI10>w+7qDbhvpcWL zSq_WBsMSX&^yD+(p z?OS)laC=Af%ZMLuYD*|GNa*VBQ}963BB`U!rU$uOF|fpk`DKS$`YQ?moMGVDC}cyc z5L4XtP>>QVgTZjo0?$+1N)?t&?84x(0}x~9nX5@JlEE9<#M_Hjn6b7A87d)&sDoQG z3pK;2LN2J2kqDoVmku3MOp6F2_&_(i(5=#;soBZC+5Mrm?XgcO96mU$*I~<``9;A^ zr0X-*mE%0* z9ZL;coK;hj3j0AS+aht9evYdxHLECELgCLz!6@B$8G>G_dK8FhfirV0mkbZzlEo=&9w z_doyY_SUqLNi!gT@oml96jyfOgVg0ugUa}G2%IJrWTC=wwtQ6^eQnct5sN`Ih5Q-1 zy!eSv#nzjQqNM5Yre6r7Cb>qbex4?G-{42Y<~xfq3d*IEr5~P+&|gt>@Y|aQDump9 zpUT7we+nhUqg*Z*Z+~Scz9lXimxFqu(%rt(HDDd1;^!%t4)}1eUKFVSe#p<@rr36khzz*XUiQWAnX`@Ph8KAw-h5^+hvWw5aizR zq20V6=n}T+tR2w}cE2EZk&hwh!65((7u&a@Y%Klb72o-K_p5w_B^eDeIx29X7Sdj#Lx1j=WluzyS z{HjLKRR)V*L0#-g-N2}>s&n8Ij;k3z+%12<=2K`I^of+pW|P3^R9T7Rcvqq(c0@nT6Z?{WaK)+xu#C z*C23K#Z)Phe9-FS`-gmxi#Yx2x|V8q#${%j=<%;p*KVIvGBVRmdQQIf#wIow%cFg{ z7J5R<8wy59itCC>qNPisI`+(bnB`@yP8qvmoLJMdOd&pl*`S{eG^(=Q-Q|_|hNHh6*P4qbKHC!B6}b45zjc2FjV z{DU8y`5X~g;-t%G!-PBnCCn9UVFtf6@AhhR!!stI@4YdfRq$$mZx1!QCpB{qU*^Y< z=}N>zChukpD2Y?YO_|~&tSuuNb9(_0ycWlL(j-&$@i7nHBDZAKxr?0Ljeqv+Tr;kU2%ONOqugmY?vpqFxg!z=7D|%^DfWh7Md~bGIAKkv#(y&bQB`Bzj{Bgy~BdFi!M-3 zw|HJxclC5G03++=m_C8MpV_{=WkWQ6KJDWO)L!@q>@=gJMc4?3B5=crrH2Mh4Dz=r zOPF7_({Y@)kp?fXZu`?xcxAv_^D7x0{=(*KAaHOZJBj~31Ocf|M^f{%MY|%pTj%3bDfF&Wqu>!$*-so?g{SKdL`seBRQw%IQJ!;9d<6 zS6=CrjD1*ll*c1)uj>Z%Da$3Rr^7GhAntLZW&8H#XNG*|8lSgsGOL^E&zQKj>dbHY zPk2Zp?68%_^#BA_*w}5yhxsXxshJrw=jpEW&`E)@IIGYq+>r@R{-XN4R|S2!{oUAN}HIHf1_OPRwb>8V7!y5{*dRhLkpWS6{h zd)l}h^6HQ`h2welmPJvnU2u9*+u|hqoh4?}$o5=y<5%o`IkL)J0$gCE;6jmU*;S_J z3o=`Sv83&e4&MV0FmCRqTnF}u6KxBC4m-h!@6CHbavx+wGDnqlF1d!NJfUCkUYIkT zqbgU8n9woy$0JxF)DqqH&pUiI8MQ|8v8afj6e4We#*9>`Xwt*0PE<*BU6#fjHQU$QA+OX z{!;!XROJxd?vbwR>6VSP)!@$?TV36bw?g2rchk6o;Il)}nAgvatG!TNbH_u{DV9yX&)X8;C-ydVKi zb`gElKdAxU5|^_9*R-+rE-{5iwk*^Wy7I&er+Z?5xyD{46F zMUNHuq-E}YT#M>>7@76F@_t|Kpuf1fx&$BN&$IeXWoh|h-3S#1gPGV*ntgRz9((_& z?f5XB_7jyWs;V+y=$*xN`I9A<@6q|vndsaqlCbS+i-F&BnOv|H@@@EF!*%xI-sY43 zM+cp2zQE39^m@WjRL`^=UKj4F9lcS@Z~taj1`#6FCg&`O|UFV;U|0f*)4yV7rJil!2xJO|HiihoMR8P{j`+ z{>0K{mV?mcO0jLtYfae_5VHPRH-=wOm|tp8l0#$lzMAo<5%B4*C6YY&-c z47YB7G_SqL(Nl}A7$ytW?y0Tkk)M)R02ah9VxQ0EH)$JAXJ4mfb)$v`oHm(23jBR) z=JY~CW6o?|uhwSSkWcjSt#2OjYe3Wfa0)WEC0oka-bTq=#@ESx*b9o}9?0kve9=Ex z>NP*6Xj%pOWy6pfk#9{%38|3AuOfbWK=r1t#Oc*uVkA5CtXnprr=OsNABR8qiuVP| zm+C5w*jF9vDfxG{cHRlBIAbbTj#)wWF~1U}AjY?93n_{BEwCd(WcvCgC_!`KLd~+a zb7S&2&3GG~l0rUSDfrx|M4Fl$5q$j`FzXuGEwel(FgF(?o%HmIhBv`jT+HHfJn{)L zoJZX7UM<~`b$ny!ep@l32^Fh6+3KomYj2m_hS%d^E}Gwiu<8k zWR;^<+mr&sRb*EUbp)Mmw?>A@+{=;i613dn`(@2>wXLS={HBwCnSRnSEf%UMf_;qi`;6=C-WuYHNG{efqO4R`x<7 z9GcO8*H#o4#}jdB5jc1&LcjQGk7oVVRldsQiG+xMPS+7?e7@}_!kFN`I}oP;VXsp$ zcQScYPWP~DJh2VL{tg6s*oMD}9m^d($%GkH2vywrutGU3=ic4PqIH3fm${*`y1I*P zXSsf=`}5@Uv(I~&;%Ra-0?~prP~Zl=T-h@ge=v6R0{^L$zH7XK=72!mri|f^9Wm8C zzM|~C9knl_UDO)X!a9;;X=#SLbi$-(w?qaZxwY?&XR=gFGp^@2=$bAcJBRr##yZvsaR$OW~DB${Wb#_A`1TdL%|!p1LeoUN!8K2p9YX__dDg zwoo{kt1@+a%Bi2dLmB3bt5qirijwp7tYDYwJ*WlYOqM^>=FAe%Z;#!1MDJE#$S7aj zZdiX4Vz2QR_6n#&ZRqWXG>)`&1OVzp59}TE0nKivKO&8{KeI>|;j8HNCVXz{&OFtU zGObr?-!MJ8ih;T#5I;YbE6!ngzBdC-(W{>eRnpr|16;imu(*ds#w*+ddxhX~zHMaI zwPV+@rxQz;r~n;C7F>U~1sy9RIl;v~>-PP3{`Ahyg&8_Xk zmkqF%UCEAVuz!C;a8H87K2xwHQ$?%BjTWGsIUl7mrtFeoj3ENO8e>!aK1#Ewx$LPz zZSkwvfK4Pri&bF8q7i+K>f(dD_>#VlfaE(~J-pEP)yp!OM@WA+_?IZgoN!`1g7N?W zxfqQN3XrizbBa8+m&JtA`z)VNTbxz!EvP<eWNYe}=gmAdxWJA20Uw|ne+~I*Jj2*brU{(eeKrWLLv5evdrW*+I09l%> zVgXuE0J%`j8vxoQn~73kQOxnbWxg-2KV#Imq$x*A+E~@(Lnq=D_3vkn0_bm2Rt1_* zC`ts^@#HrHRkymLyJ?S(1=)w8a$#(z=5hsqgO5 z!#Z2pAsZEXH_*&Af|VM;xFFV8UXMX@7`g448H8%BDejDnMWZwW*qG)T_BlxHlCa0) z9?{MEp6N{4TB#r}6+kiV)7n5NqeRTZCk%+xb^zGHbuwU5Rom`6&hgh>o4OgF6ILec zUJwquLjATdL&l6Z%AQaNG#a{R{tQ*ya;Ll(a$}?mSc*7b4QF?n;V4mcV$&#KObim? z&4-`Vwr*D=a^sgIMhd+vV3IM`z6d^h0r*J+rcpb&K(dC6k$N0roP*!%#IA^c&j$=a zK=m|;4zF-mmWf{np28A^Ul^l5t5f{wTOY>D&!BkZm$FJAhRPyiA6b{>K7&l3h119; zzBdwm#M-q=iPFM&Cl{=hi*rs1hkM_^_LmO%O_|>%+t>+KI9G%?ok8JceD2#pR~n4n zXF8<^B@ zG6=c^v&8U-^*RL5v4Tk{Ljfs4Dj^c9dvEdsYlim0if74mG@m==8vtHIYVb&O8(+=? zdkz9Y_){oiy^>F;3{=KyQO-?VtDi%}Il0@nFj!8=QJ>UsV)YO)l1&q^6^ljSf^ zS;<-WVapu0;7kV9?uO&gLRY{fj?ycsFDrqhB1t>bXIw_ZojP=|9e6#$C}p>Ab|jWm zN){|%s|3@ZgvTV2ib*5?PbJp^4prX9iCiYh<;h)r9Zk)cB$u8>x!C&6q_PIA%+#VC$+f9!+ovJ9M z``6k()`k8VMQq#@{CI%&%c4$tgh%dZK%Jd^_XYRz%GNZ!Dn&!-v=Sv_&(`UTsnO!A zGsg?Ef2)77ZD^dz42)7uQI&2ydvn|2tR4x!HhIY`4@Ft<*?5Jj9@inU^!+Bb#G0E9 z>3kp_fyxJIfhhwnc|u*~;ksev4X<0q;2aSryv!k$&u+f&X{hTyS(&~3N%dr|#y+jI z!m_(1a_YX>q6zJOiPyv9BeBJcIj^qCbvZ;OUs!$Kp@zrU`u%KPTD-w}f?0Il{l&#Q z+6Us5*_&=A`|nB^vs7MjEoUsmG|fe1CjHV@WiZMhsWYTlt6`a>k?FOZhj#ZzWOlJR zPYm+S9#8Gp0X3E#KTnwI|j6Oup^d=O5P@JjxTTjt^mT-jR-E4Tm3W zYL|LfMeNxxRmFGCIHuK@ar4AL^p$i{tkh)r*KR^@r8STD3_OtU)u~|=_Aa?G$Vpz2 zl(oa^E;+<AX@jcw8s_>;SFH_Dt=0mO^Eosobf*h~WiKkhT_o#b1lN9WvhEcBzW- z>5o`z!4q~y1`7WD(4<3ce5qWerXGLK&aeaz7NV}EuX5Pe=uEPQaofIz^>1iNp(gzg z>ML(Q)nj+H%8KNi^zPkcw$DU|)Sge4*B6$Qjx|qgW{8wrcX+zBr5fK7)^fD7HdJV` z*B`y`l(Q~7eXXj6e%1A+)P~`&?i#lG@6pR#zdC0rOt99apFDKZJLxz(CB)PZih?V$ zxyz5fm{RBJSH!>vhDpL0--988C9@Kgl{br!&sk7Y#iu9#a{o|)oI=bIJ zXHk#Uo(*6kIx`gC#Hp<9Dxm{Cs&>WR7hl#kZ-q5&j@Ih169*p+CmOiupFDJ@A@lTw z0(<`WyRM8tUGIpqr`Hao?Bz!C6m?n6S)~GM>56aVLtR5^;F__Nwlfx2;{A8gGq+IO z((<1ZkG3{HpPpuEP73Yx?^pIupk zxZ3-RL!}}ex$%(+j8dgjo=yE@(!`Zct;EZBSgx(Xr|gNjoaO;9uSbmf_&V8;77bY& ziW<}QoCH!69V%&RW_KwIa$cV7cDGco=)`tS_md8}TZsAv<;;$srk&$m5=?A82tSq- z9$i%zHwm_vxUQljUJ3kDEH z!+U63UyHXHWl2Y;DfqvYOp$p3>7=b=;*5-u@u8*~iDEiWQT2bSwhjY;xfBwM;*vPXi}Q-!>ojQsYX$uN{xN0lF(sOo_>{P%Iz~0nX3Vf-fzo8 z1|l7*QZ}DE+Q?@7BWJa74BoFb#%amP)D{#Itt>7+>t5cLHYAr_*HU7hXWwXH-B(pr z5UV87-K*Wx+8x{`vWomA=a~lE#wPjoOjuY1Id^hJr%V{BED+4#~pki6;No-76@j%`;2W6=uRLnZ3@#HRG47^SuhChL$d^5ATxAm9uJ0 zRF1(anXQn^Wkhz4(Mvdd-`mF#gr%8Q<{-es7{fXTxX7 zvzKNb^u74kJLZTjHr3KckGzn@X(sAuMXP3>&?%M zHdKDZe_ivT%Zs;>vyPQ~ed}Z6akYuK-ZZ9xp7(pD)NdX6ksC6N@?MOr&ekraDd@O9 zIiaxot=5>oX~@F1ajfwfZ5t<(sWjwc-}ooysL%Vx0ZmpIOMn9^ZLQEwa z3H8NTp8)sQf9oE%w6${G8w$F%ZF$@vKn(=}rm- zI+)79PIOE`#~>zVgO2CfqBd9@G^TirO4MggkNRRd;E$~huFvIXLZyAY_PIZegzHfq zLZyFP7tL)h2ANLA@|yEQ?_~$!bJ8#vB@@GOozPr+yb197#Ij`!p0+J7Kmeg!ZEROx z0mKd9ark1kGKd9UEsp>|xSRk$_l3`}pX@-NV9?6{f9(Y~OV@?l0OI&^K_Kw5apVHF zg^cE2yAkmDWPza`KrX<8j3hE6~Nk#4|hz7!Qr$Gdk{$ z;H%T|Fz#Y*|FoEDn(D6HhaT7XjLK9!Zmql;~iUsCpiRTI9Gb(Tjkw|1ZK<&@d!88C4ny&*yieKtr zk_iR8g?V-~#{6}`*G~bzkmlJ@QE1jY9SvFNYeHJ!OGRi4bTl%`8SrI11O-^l(@`1l zf;DKQ1@}fmXeJAu3t~c@pEH1CEm)V#V8HWz$qWjBgMG|J!1n>DEI!5y>KYIQ_7d0y m*k1u+#MQQ-AOVCiHeg(mp#mSi0DJ8yttUdz)3bK6A^ZnnW9Fd% literal 0 HcmV?d00001 diff --git a/integration-tests/src/test/resources-binary/samples/test_semantic/83816738.pdf b/integration-tests/src/test/resources-binary/samples/test_semantic/83816738.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8ce320af024262976feafd85513dca68e7def912 GIT binary patch literal 29300 zcmd42b9kN4x4;`?$2J?=wr$&X(%5d)*hZtqNn^XQ8{4*%oA&$d`JMZm^W5|A-Ou~Z zo>`+c^J2}O&t9YoB4V_RbSwZyLIy&6BP#$84}f09(#6JeX zJRD33=|$aL#Fbq>>I3MN1qof8KL3u$5;D*+FahX|4G9_P7(Nsi{O4OxQkl@q(8l>s z8AE4lLV7uSJJbI(&+>2c+Vl#BE-t1{c7*gIrmmL8rb^;Mx_@O$mQrBL!h)2g2XF|L*rA`X8nLq|N^d z#GMR1ez*9aN&8b>T-lKD_rxmw$@@FB;~!ncENwm}nUG%0=HvQ_m>S!gnEr0d+2!Lt zvjwQIbNzni%lZC#xS#SGmbX6f zej4NV=I%orLooin7EP4(-u}G1kh1RD+5U2KHM99X*y-o>xOB8K!0-1sIO2AFt+&U_ z@Aq`y+xcdX;r8yz@$`H)@$T_(OutVSgYojj6ujA6^EgQW|GxjkaMJohByWg~uz&w_ z;eNTU_qL7D>W8~#{BF2%ZY;wX@~As|xjC0r6~RBSKyd!K>t0{4Ay zX@-0#dJ|DChVEG2$Q^4gTr;bdcLjWxylp?pIf4*+QXWUW_8=`D#5q9q4KPvP;yB3K z&U^Iu)A-22e85*g)*+RNMEE$X?Fl#ra~m)5aXosgg<8EedfWK?9-Lxr#=yjm?BU!} zpY7dwr+N>z(!SectE>lK-j>r$Gw0sOc%(c>(mmd~Wb#LC`$8G66rEj7cymalz#xcs zwiP}Q1yjZtbG%~rWo@>s_BMoVpE*6=W=Ym(#4PCX-dw|zP{c7}r-b8G+bnF>#h%;I2>ASH2|0H|( zYe2EH)R%@0aGglcwQBs}$nfg|HI-oml1lFB`WCvEr)UJ#lg8JD0=p$JKiPz1xTG!k zd{7bs1gwE91))X!!q~t>0B~r>c7r&|)PA%9I6*v6p&QPd*di|zY~m=KStW#j-0wm2 zOy&%(6n*Sgnya|#kBZQC!;+(F+PW_HSr{Kk>lyO^LdJm*nIOeQjQv$D&>XuDk{%N# zZZw4yG#oN>&Wy%JI8$=BsfAbtWC&_x_^8C>y@FB>{`Of%uIt_yVd^$u263%E^ze}3 zVe{Hht{b{cQ&F}@`C#99do~|zL$rxQW9{@3vcgIbsuv^x9ymjuJ^juT+X>e6QH0e3 z5)%@Wvg4N^KoO%Rv?1_1C?>KyEksnDJWH@or~p@MYaM|;zNg%A3o@tYjXIqgKg$X43?*$AxQqbS4fWre$;b`=(#F`wV(qsR18fv!r zjEL9+tV14(`6P{qG^!J!<#0cZs2cn*&xPM?_=}}=8nLbLEV)iPV-T&`G8w8)N~Scgu$7n${v0}cA`U>?ny4*1dC;aX_RH;IHS&{ zrQw`~<|2!23kp-H6@Fra@y-n`63irs=*KbP(wu?WbT=zz!Uh+2?qG#R!Xc0AGdHLp zB^g9VUyzOI=#@rxZp~_yaKcvr5NJ(|-&ybMVae>FhDve4$Dvld{asExPfol@ZZMw< zuZRn;feX+4GZ)}9S7nJ@G5ArU@bWCS=&MxHY-0=%Usble`S~3_ThXN~0=K(IBEXP! z{6@d>VL=cLW;i*vD6G_nYk}!7fT5`z3OGO;MhhP!tbf3y03{0TZn0gofx(M91rovTMMF!I_K0r~_CqtewJPa=W%r!W?Utnw~hGmtN*z0zD$-5ZrFSemMwGuFDVCE7n1%2o8btH$}l>n+m zFIu6ZkON?aT-;TgY8ZbKaYM08;!JD6>dKBWEUaFTTE5{p+prwZ?Q8(`8Dw@rX?$#oSp0)N9vQ{G^Zc|2{eK!L~dUMiuNi@jleh!O7 z$H|iIT*sIoF(k-nDtPQj$4QFqT*X1bZCuD`Vt8!zsGxxXM}=|dTx!eS&~>MRhK=Hx z6QQq1EEibr=^*6HsuJIJ9*sgDfZ)2YmArDNurp)F9KbvXGu2_sNfK3AXweHS6~$)q zW4`rsrlX6p(3V+f(R`$%zKP<=Mmn?5Mp$U+Fs7rYh|H|%zOa(`pkS;g>5T~I!6)mY zq0?d1Xw71kMDmD%Qkj1uZtO~8O?>*kacvYW&8K-Bepl#ptjyTRH@xyx=zKhrshuNY z=cCkoTbin!V`Aqs-F#~~ULjB1$XD?p=xMdM_r(=k_5cxzRx=X-It+b-z+f678WR)sP%0kHthCs3nI1i?Pn#JtlOrF^ zluP5(ofQ-pj>H!nb z=Kh_BPcV0cf^lSn(($soE!~;awCtB;>5!aMd{!b0H|5w#A;~XE$(jVExU57HZc0m6 zrSwHf$?AmZh?Bzl;H>&2fZK{|qfP(72p0dh7+&&T?C+|4h!d^LFs=%43Tr{e&Tt~& zSz(v6CilUrWu0t`4!NO4CJa%zFn9-#YGSSNo~k@`Dm!VU!_3m@=E-K+n#eqLh3;mh zL(J0g=E*D*G-Et|A#=^%V1(DP!-sk(W%4;%ubSDa;4LF8E=-~Bp5O}sc2Uc z5YKhj^hY+f^AOLA*Mpikb=ZB7&vkQPw%U)qrfxPB!tZC!HCHlCXQKPP+#J?lwtSem zbuXCEATC9%3EU&ZdbVD&mu$6Hy^oivmJPo!D5E3NVY$!ks#+bXY+5S23@R3XlY6_Y zK<<6XZahjAJWS0iF%~N|7OOBG&>Vyn8ibV^gcZeOmcV1Sz+)ceGDGAt(@-3Y(*Hcl zr-)~)fN4NHHw2^!$=j7S*66dTTc5eVqab~Z8qe}wOZXIDF97C(4FDFnu2{5#ODl@ir+W3*>adLwGOf3z%ctO|~>cFxutwn09I?^pr+U;%?SE*PJ8WUs_eQS&;D-=o^eAxAp1n>m;i9G36r+hP>==4 z3ZCT2<`D(K1)lwMa_Cl`$Pr`5d#*!EtS^wJS*~?($&;!`1+Z|bLv33+)xc&nI+NA) zCh>Ve*{c#!(M)6X61T%f-f+=OPZ-AO1&ey3p&v$m#6m?q@z4+Vt0m}rh+iWNQfMFEqd}-+^CvKp!-L9yEa; znxQt#p*PIoHukZ)CP2C-s6ie$*J9>oe??xo6+JfDb``8c#D{z;9S$-=#eJt*B-Rw1 zYK9UuhZZ%56Wzy(7zc?M2SkjEQgBIAa9LAuO(?jaE4b((MT}!mU>lQVBh#r!Umh9~ zLZU*M%==#x>vkDdQi(<>*aIt%fZ|<_ak^7=gbBgkv5eAu{U#@oU(ubW+u$^!-Jd4( z=!~>GfxJ6`y*t7DGY9-<4%*M0ND9AX3cq;@zih=f8iltu;-5M66z_Z`MUe-%?tu_< z;HX~Mtb00c!trSOCr{42pL022;n(xkCtNL`AP~a>Epb8lJAY|owDcG`BSnrOM~-1f zjxnQWfTL%ip=U&D*d%M%%xlc zg>x(`RH52E?9+z%fmt@`*ZIw)CtGH61Z5|VHz#3fn{srkZC{BL-(0CuMXYFacs3^s zs@XUV^Qt230bqGB0mnx5#tXWgs|f>n9>q=HL{uKTK2qizss+E&1_KGms%J(sJ$&UyPkQLYmZf`*ycyvB2ee)b|A)RQR0u@f6% zmpFWagOvzO8hjAjFc?Yzf)6#+{|JcmyzW0%__&#WSNI&5R~jF3p73u7VIUkFf0y_i zn4xb!nznT)#cQeQg?CEm2AN??V|TIS+7P+ko9IfkTS^)}d%B^d@~zlmg;px(>{A@; z7X@>fgG?Kw6a)qr9B~Z7J&b-=+D_UMUtSGp7UP}A)=s{_x)xFTHehBND%C?iw1&6123L5l7ybQuUhJHD483&;RvI5V0OnCoHs*V z9SVFzg$lL>#;1-0V!(xkjL_8Oc=5UUEu@J(k(%YZsg`!>fSFqM4gZm@hYUECAZ#}f z6pm9_0}&6bzPjNNc3I7o1)`f59Q+}dqZj@U?fzgtOe35w^Dwm9Ow8R&Qvl8XH6+0EjJiN-VSL-EVC?oDq+fRpN`<31FvHJ7wsBh_)fV>+lC(-~ zj|iRStQi3hWHyOrO2+S2k?>oe1p9xQ83eOse69egY3McpE(|-$uLr6y4FXve4n-6R zX$R|IayeTQ_zFSs7+KqW=*`1hd5m9+N})U?K1b2afretsdh$Kht#ld60Y7 z94&Ut7|)+!$FPHJoTsAOYUo_MqxXxTw?E^GQ4U4eKm~GgsnHQzscuj+KP^Olj7nYy z-e9lb8df#6xL^r-ca=C?648NJNQNQ5(;4nc$&#!5PnGK8F~_C(;zM@sI$>i zg)`becMEBCNQ2r{NJ~SyZ-L{=m((b7<}f6h7#P?n$u_U@70ornvKZn^?uy3DFE-+T z3oXZM*$ov6uf{+GRw#pmWko;E?aVsU#@R|LCBl1{gRZ*OhoLY6C)<7^Hv zDKk(@JJf_fd2^at^T_;Atd;J0u>uZb65q{#ar+4NV;k7SdJzmbvTF^t6+U$m|Dy9X4~{uFS;ozQ9u6d7`5s!Jkq|crcY2IuTOUw(h@~#m4c`fT# zI@Kf>T^K>J#4EcQPEJ2Km&0jac|kl$RUvShYw%zu4SC5B1R7?axJm$D#b2JM=@#n4 zaolOW>cK&sxPPc&G4U8dc!sc0ku$r$`S2aYZENk7Pk+kLp_N6a<;{?_44s^vR(qNE2Q%TBB{PC zr)W{8k|HN*QKrjf+TU)Hlxe7nOA-?*`I-OiL5H)5o3sNDIzCav0%$qC&p%yaY(34Y zbPFBlsxl>ohRuZ~>`|~l@yC~fFxX)ee-NH;LfJ$L^*pAhf*z3YkR6KM(Ex8@$l74u zz9d!z(#us)yje~8njpxen>`g6sxw8?qgw@Vjh$xudI7+oz{^pUqYBLlJ#2yfIEFtP z;ZU8oJX-I){XoD94Z)D2UkH6SJfSMx11Ihh*(&!*-BovJJ^~vBfy{X|i#}4{>t@Ss z9YS&>pN1M=>Y9mV2XW+colSc)f-Ce9M#NW6ClFl!Tw!giK$1Sl`~A5Wt?H@WUl+t* zV+2qaPRbJlEw=-vQro+`_J;FV69ySrh~XyoSDb_w6@c@^WD&daN@_HVRL-u-%wq&%iBkML97m`EfV5(MM6XYp)Ts*# zM5J7VQd^J(6ps!VPfmz=ojAcnBbPanGmdkHP#>^;30)kI%n~BO^Ni|*^oD5+g6k(> z!|`LWF<1nH)hP;BRyMNbMp(5xtr(R%EJN|DkZOzWX%}V3l2m*2?*9IKQ!P`wi~-7XeWMN zjlhG)Qr70N8n*p-ZH3X+R>+J(twy`=}Z!2WWF;0j@n zbW|S+f`!l5YkMGE`7}F6AH%{+bqR}fs+uX4$(2o$Ww#y$Rzl zuP1Ic!VXd4yA511=B`akwR1tK`U!2ki&Br&W-Gv(3NyWpso5~*@Dz?_50*R}^k&G% z5;W?kV$E*D(&aTzA@E=*nYW+fftfdRy3=8ai17v)hsAw$kQZqJB~=0?y6m?SnQu^J z$~+k|im5V+X)-k?O#Oti-?nR5(&W@l9Q~3j*xA#zLN@BqdK$>@>b?2fu^AD97P}kE zHTB9Pw$&bJ1;k1ekzLWacLX=A3^{3XOdwEU4WuhYtU0E<{E=Xb1UzQLGR#!QW|e>< zHnAdMCrU80kub9_M=zoRH?h!qnbEPb6&ZOxG4O1sZDN7m*02{*2h zz>SRHKo8(RpXjTh2sc(l8Z?UIKu2?=0db_s{$i|%IJgzjSCg8f;L06UNWR5kSrG&} zGj^}DO?G^7&EYL*9xf=XFb+^_1?~>=OvPy3>d=IHHFp3cpMNs#GNM)~V@_NP#Zodp zHqzP}Fd(`pDRC`eAk)Z=vcN*hO$5x+`?F}c!`B1u1%B8D5a(apUwUmI+uVuA@`9q_ zz~dWdTuF_Da}zgs0%nO!ID( z5)3jgZlE)W-_meVr;Cy>EvnXJ|n&Zj~krzemFC7<%g zn@+41qwMQ4-gxZ7fR<*vr-k&|o^MAnAk*EFQoSaaE1cK!E zQ}}lux3{1y{_lS4V`e(<9nZUs#p`aJ{I54RGjH#HexCS_&2Q_SFV~09yL$2?^dCUA z>UI6i-rUV^iQv`ee!uhN{o!-_i$jmSd_o!@ZYZ0t-^15yVdka_A`z7R*YhAgp3IlV z;`g`Svz+IBOuo@xgHsFfI-W?5x3iP!R~S@2mbU_*$H9efTVYl5>+hJZWM>_36LuaQ z=gIsXZ+AO;uibB)Zu)+!uY+$5V;^An?UfS$?ed}bqW2QgSgte5@X^$r(|jI-w%{R! zVBUJ=5Ti|z=gq-&AQvJ)I%;9UEcsc6o?pfak-K!c%jYthSxmPwjs9yyU1)(X*Bhdq z??cJ>Mq_|H@YjeS2v&+0pp=c=r>QLNH-?U@LeJ=5itD@L?Zh1ikU{$!)F9~o4M&Qb zY!@aEOidk|9T%Psp$q0+ZQIA)3$u-zF!=X#qiSTZ>DMI%Pa4}!>-75fR~+JG*oj}Z z^}a`M*IR_OGw8?cBV6;yn6bC)(wnWlABC=RWUKtVd`Kyt@?{`f@?bm064S9C7hliu z;ej6^<#s$h*gbgOetq1(=g&myyA_OiYb+*sn^YNUH7DYy)1SyZ#i5FHm{aHuEFU;{ zm3G69$#W~uy2VsHH-9yFpj#$|i!$Wz3528to{%#|vNNY8xe(0E7N%o+gblC@6uiaV z8rg(Z?Ejeq%g;_j#cRqj;`uZEd#vMQ_pgQ70auOSUTh8EYSu$61T=Nw{!ObQ99;Ux z{6_qfbcy{}<_seL7z1C1|hyiYw@BU7h^MPkHSpmHuAG^lr}Z{;^ig3H~ceV0tHKcrX4( z^qJ*-UwzcCrMp=^_L)pN6ZR=yySu`6G!b?t%a0NfTZdGEz%l@$Bwy5ENwDQma4@bk z2^MHfwLy=OW7wrPH@n6^w(J4YKtp7DW#osC=!K?t6DmLlfIG$iBC~#!Z($qNx`{;Y zyBkg5Xmt1XmE%&l$rjx%30Bm+Fz-O-r-F+bc0`!5B?bNj!)mMm8`pZXa=z{9}|%^<;?`qEGb(?*@ZEUBQ+AsI?SeM^x<`S;Q9T;?2Aj+qw2i>mf!rTYnKy+`*MkIuiFoje9 zez7q|Ty%at2weTO;^q)9;^AF^+rS6DXjXJgfd#Fk#8ahS$jol$VDW_>rw*f#=>cFoHGev2xP%}A-jOVS#CYsBQz`^w)HZd?gJBE5bzLob5E}&ZsiC%P19|~ z->!#je$@e$s|aLfdN~6(D2lutY=rfjh7E-=EzTvHp!FgFQQv5r4Zq*8DqcCE;>K`t zJV5VB2@==u5O#)n=~FJx?-O8Bu&3RRy{d?hD*`jNA&1=*-%*Q#@W2`dnPs0qbAi}~ z((6pRMEy9F2WH-24c8t7apVV?qng$-zjk2DGg|V9n(~M;WN%sVh#KTo!81>ss5>Y{?#EHQ)!cmv&Gx_Nc!GWGFC^QwK3w^S1e%^S6cS&xS-rXoVBf|BHu zl(>qB-3f@@Q5TBG2qyVoMC);}JMzJWhK1Au!okQ-Nc}?+fJ3AFbq5HWVHS)wI5%|1 z*ZFW<=+y9M5^54Va~htXi6!wt`gsz98N3AqHnW^Fz2c-Q7m!x&=7e=1$(Xy3S4^~6 z94_2O9jo;;KLKNiggk@kO0vv%6>D%_f58L1Vz+qf8lD zj$AfTSp(Jk^p$$M1FRT{Wckv{)lG+)oIKO3uJjBmEgunVyStceMO`mEO?eam1-Crq zD+ZuXdbf>Q2Qi({B>=D2qbl=!jMk%Y!{rXDEabE&O}=)05|Kng&C+BimKKe?g&jfM zLkuRrwJ^=)(bFv#S|ju%;II{h)LA;&L_V#>+cg72EjcMMy`x;qm%l;}+M46CG&aJ@J;n3H}*r1PyA#I}GV#?v?|1yKf$4n}B1Zg%Aq z{)P;kd}-mFVLW`IsY88++tYFIvM$>@wpt4m6bE_$^Ojt%RiNIxisN&(B=?Yg%ITc# zql}s7>#AI~ZjuoqL2fU!da_QnlEp3D2h7s`aA;C?Y_PNNQ-Jyix5mc@4iB z-jtEo&u8$v$5Awfn?u=&=vQEJp#jnBycb^3bp?g_LNl^f`kIbW! zWUQtrqFYv|FO<2A*ET_9dCI5wgQ&XL)>L0QFjUqCsDc+9V@>+7J(6c!*VlR5`Dau6 zfP4hIl3R&jY6l%X?9GeWB%Mq1fSD4}5D?LthN$9C$ITw=eAAo{?$8lgs1Z>U2q~x# zQGvnw(ZEE31I@q#_eh}9(V)_S!N#be(!rt9xdU?_U<2(2Fw|m=dqDk3O}sile+2c2 zT&^KYAvuQY%@Ov51aLl;Ip$&`1b_$i#o`AqY}25jaw@pip0LO>p>MdnnvD%p)uU zDobtc!rN;Eq_GU%Iv;>G8>*q4=gyJimS!q$nQRoJ&%$yf2z07^-*(nitBBG?Zb(z^K;_0F1-`4nDiDTnDZP&;ivd>8Al zM##GVB&0rK{q!0B`fL?1wUQSj-(p^B5BunQCsE*VlUX`S5hW^=zE|a@`UrZeMc4cM zDqU{nV@;dMu)TKYZm701YiOj99agt0_Uzucp#MeW!_>d`DqM*Gr>CPu2F*^GZnM05 z{%MtlS8Rl&)EIkgMC%~d&UF;v(fmoCtSm0Jey_Sg2BEg&T1Jbyb5*u5gOd{mvtl9V zNUr0IF|B%k0q^T@u(_LeVMYxJ`OG6qb9aKg3(BF{_N14d7&-V1+Voha58k#8{i=U;S3jhd9NK7terT|KxLQ{R^6jEc~gib&PByAvpNqadE?r48d z;%U4~dlatTxCjsiEi1H@L2eXH4N?yTmWz)>H9eIwFbvEsqx>Rh{&%u~xoMEPX%J^6 zm{BR1(OmF7RhUtsKXFM~21;6n0Q6KfS_T+e2De#|YxJ2%vF{^&@q;c>(Hx2<&Oy#z z;#N@-SCvjW3aG3J3)gq57cNI5@Aj5kS)*BuApwm0lV^-4MsH>BMa zZmPyhS*u4QJGgdNKLus7{Mi829-g}vxIGEzs5gsC8(2F@T-a>$%kk)*kDo4web zXG(oS)Jc^W)#4aTthz+_=#ZbSX>`euw~W|cLByzd_~;-j6M?7+U)lc@xkil+Dx2^X z-&qK<*%SEqeey|TRslJI8ZRjC887Qn``*B++QdTETcj8w{V7;_8=EegSO@!m5pjn~ zZ@=Ih+`bAZM)YO4R8}e>$8swHS_1SbTUJO^^osjm8a$~nFePOX4GHkH6x5?}##qaO z=Sm`U6h0-+q&R?uLHt!sVnZumj4nQV@dy-rrLZz~6$9 z_X#S_(hTV@fE)xvX8C+1m~eNNlMKWqRV)cHq4efpjy|yk>S zE_!?Z+*ba4Ow){XDE`^HvEZ{FYDMrDPeHW?8e5xL^6;^9i;AZ0hoJOg{I7_+(hH@^ zBdU7c)86FAqg*o{S$HE{IaPJ1tmzAtqqGE0>7Mc<-}%5on6SPVMt#_@|>3B4& zO&krnd4M?*PG_RPPlw44*XdI&ybn(BeHyt-N(>%Y=AM+{_3Fz?p51BG2<_B**ZCk;0e_%l#6L zHHtksT)o*vS=}z%*LV_j;e662=Z(9IXFrXFG`ixU7nr2=20*vNguH~h-;9D<71dS9 zKf}(kQfh_bUIK>L$+}7BzcUFPU$0+-1|D+sl<{hQNfygxQd| z=tTO4I1LV%HUzZLwKLwlLD=PdDcHD6J3wCHLkTii$BQ9tJpXGG?bhE;z#1n+3dGnW zmfd(hh5vp>vo=ojJ+z6bX;u#(Y<^Qk;YYH!(jxtopPPoEbic`#h@h?{7!eUCmFKHR zWasr{6#5=QHpyMg52tpsu4k2M4wPRKqQUiPTm3#IFeT<3DFbk<=A2nY5uxRTXvzgy7U5og!O{xp7GSwRe$ko;)?lmHWsYV+!Cm z8lMCw)-Wafvw4Tms&^b{1Sc+GKUK2t7trSprlTIz43tc<9&l#=*(+PazR|db^+XO~ z^?31TW^5F`V|%rtyyf|HYvjfICXy^6xUG;$3;@ATq&ql}6eV>1RsfPIedF|Vb5ZIA zmbkMSW|soD&98$1EGM)PRP5j^0cdZSfGu`%CPU7SS(BB~~6AnX@!)`shC>_le zmME5GUV42uC{=(z{?7(ZnWA~hbjfvz7(s&=@4xQ?odPjuq*hbZ#-%g%ZPrBRqbL;$ zCJiN)f?V5y(m5#v|Hak{9nTFg^k_mwpzv&+9^Y|&R=q*sNBo_=* z|90`&c0RqGA0C{LI}HzD{`r7#WV_{(nHoGuXygGRaO45Oz~J5*FzCO#3jV>45&^-E zm~#i%B)Ah$TTg`I_Hf8z&LGg9=X)-aOO==9D#btDYsEE0VjLGPc4K;ozjAa()8vb1 z3czHyMxdw!0;_ccWd#2YVHmn98M*+N257L=l7B)V&=UaYcYoj?pI|ZN83=oue%XCB zH332`%x^e!Qkd)wb-P-5bHxmR(3cF&-0?tLu-sBCxaF)hu2c!2>=h~qRS@h|j7oG` z$CAaOq!5_xgAd--pRCRkz~89c2D^k6Ke!Wv=7|rVL-8<`v9dO0iP@Me7ey(-j;XU| z6{CqokeH|-BCv?j6iX_n%iuM(r@x+Rc^o*%37G0#ezo;zwbmpQ)FdRb3;;6?0IL@0 zu?he){2dQ21V>H;NA3cFE(b>r21n-7BAmGv8;PCf@mwsLqNB5yTBSUIRw&Xqtf-eM zS8!+SEuogM5}OB0>BZ%p1P^3(DZkSgsJ~vey9ZbMG7Ab>k-JE{TFe@&C>G9YoM+eg z#MA>0bVTg}W;q?!A*POGvSC{_nvKN2SI^ghZ&j?<0e(zjHlJ!@79Eks3c_rylcrf< z@Fzj`5i>H2?i$5bH=CzO)if42n}6sT4fIw}2~Ygog(M7(ZxLA2A|l=7cc(QKFQNe0 z!=n|!4=L}yFAK=G0=qwNBGpLJ8mARdiT1aWl>Ur}MYxiSf)u@EeogxR;Z#w>b%RAMAe>eJ1H` z9x-Wx_3q=vTF!`au{tkn#2d^crL;(0aKj^0$*}P!gQ^7zSyu#X@iN!?-v_VjPk0%D z0JQclJbX=b?A|=VmzEVl%bnij0%NNqV>w!X@9{xF+baq6;82pl4=PFgeH`gX^emy{1+Dr) zvRUY++bQN`g%V3{R*f({h#FT4{Q3( zpV-(LIQ|R${m0FZkp3U^_uF+)#MIf?$j)Rqzg^h!d zk%NVuj**R>lZ}v*nS+jnosE@^>BIGqUh;!FS{e)5nSVI1GJG^8=xqGkIg^cvm5%eD zLVyo1$$x~jjGQcV984@sj2s^&g$*4fOfAhVT>jKkb}_Y8BV=Kw<78oFW@KXhSE~=q zX3F%@`ycf9-)QDvDEiwg^B*$DNXW#@$?&`3e@PwFZ{qpEEB{YY=LzkuJ(qUCd;jz5 zTmO=&cKmsg&2X}*N1Uxk=@HqEjUKhvJG=H7wq~A{! zLKQVtD5eO_hY%krmznYzG}w`Pz}3>=V&qTu-OHGK;}uO>>%fE`3-Jo%FQ7okuF?$# z?ypDR<6RH&-Q03|!Ck)o$)5L)6Cs2qJ+(Nw+Mn~xBkD26;CJnGkf~}F&ISA08Nki{rYn~+vstnU3-8gg zQaF_nMOKhgx$fOsKVhe0q7@-6eP|CuXs&SajUBLaD9kw%ZJw zatt(spmyitlTSMMI0Xa^r%0}JH5@i%&xQ}*r9Ttc>dJ8Sa)y|bbfX3hm}`M;C@)Scs*5` zM$hBma@(f4Sg!9C?3eId7!*RT71=Pn^dFABHh&6#{t1fFHv zbVC$t4lKQ!s9E?dzLn)Fi>C{>q~|*$5c(mPMXCbY1$>iwP&2W7O8Y4Pcvu^Hh*FV| z$K80uSYx}7t8Qh%NMy(5?2U>SHQ=lxZGCx~r}45Gw}@#w;EPX5t9iR8!K%oohfVu0 z+EV_k9WT96j$>M*n%{k1<9-cc4z4>^mXk!{7a$#-^P-dN z{=kMOlC0TT-a~q2J4aP1aRE}1DBrjXcP;d@XUnQOfv`bQA~YDnV3~ zDaIHUBE$Mc_6DM=o2L}igAGVtpi*L5V-S9g^jKpzQ9Tg|43d9WhGtOxOmGj;j5tQHw6sRGsfKO15eR! zb0O_cmYQ*Jn4rE`JSZF|daX#dZ+MO~*?4cu+w~@^|%BGp;Yz)Z(67*jkE6H53ts``R6i&(MYTl0Q!Hdp>r} z;-&B{omIi9TRJBKAPRlDSzA9H)J$}`-z_oYWl0p&6r{bXNylJA5)Z=qY3k~&QR_d= z;jw4(a)j5`M&3YK&hGj*!<*QH`>Gl*MovqMbL-Br-N-s6$f;`!o17WNcD1dN#0=N&B@`o|g^jM0l-t1}+UFS*n~rfn zKd+4!JX4o1aR;i8hA^9`!3aqu;jDgcX>XqmN>2~v-Cf&y@!4jbs~CI5z~l0#l94E) z0Y9q!p(j}vk0-z(i*nys=4==Ik>+E+N!92|1?QI#ZN$6AJnYo z0FzO5f3kjly~wGe%**tG>Io_XSpJH$V~w;^UXJuD8&o6yBGI7kxwcgP&PCsMQCg$L z)r8qGriE*fJ}c|v4?j1PjgRYr?x(bIy>6?IbL95i2z|@ZL^Im&KUYRsly)^boTi4; zr}0>Sf$d7-bIe#v^hua`vqoWW`D;})awPiEac))FpIxaQ-$)g4R-r8_N;|tnA5ehlK2{$lVCJ+0jM!5}T;2ESaY& zQo-U06(E?iO*bmKG(&@;@?x@$-z0JxI|0hgoQSdZhZ0B5h0zi?Nq5(--aI?)S{fSa zR@F6gg(GC@nq%}QXO~~746t}X zO7Qm@{iU!LZo`WPY)z(|SH|9_Vm#5ToEcv1G6|rZg*d4;@0pC`il1*iuRQNcwv`#R z8)4pjVjeb}Yg{s*4UmpffwF@BWDwRfYIO&CC5%cn#%G-o2pza%5!X5S)>*PxT9`OQ zGG;$sEz^k3`rDl=O}oXzBnqRtJ+FB&npo?ifq-z(G~>nO+Im5MZLP|VO@+T-89ft* z?#50{P2c*dYbLN9{FF>=MLVym&E1hje}RY7T(&3{-@o=d=(e_R?k886|GtKvpIMQP zqP8oc(8qnF$t`xgWUsQhMbf?RI08DvTgOKBRPS^q#avawPW`Qrhl6P@ ziYp~o(Eb6L@*r^13SViiK})zO*1y`xfms9P9&J69mrY;Mj8we?bjYu&DN7raY`IoH zw~(iM3w?SKBFagq>EsYoOCO@D+H`apuKPVD=&Pt1g%B66(A4dU^)_xqt3a-bf%LfD zmtGyD6mGa7_&3Hw@EIaQxigs8S}ux}!sX&E_WYIt$2P9f_@%V0HjED_)DO;I)T|Yg zq4{XUtqr9;`L8`y+g>TBQo%O%PhM4|N1Z-Td=|vT3tM0JTuzMqt4_{dQAmkilP?xU3XN98_+PjOWyQb4$3JpjaaKPN^4Bv zIGhsVK=}%+jNX8q++63%7Zxq<4Y|yjYirqEYL58GMG zlXp0wMR`}Hndx!O&N`NuU&fUWhKj4(>EF;JmM~A}2ya{BS!=QFY^QGjE*oNkvpE4S z7Dhz{Y+rl1@b-Ee?SihYerg8c*gmgeBeWn<`jAM=jjfjnlNgNW)%VOKvc6eo>h?=9 z*`dJRgV7bB0ZPN0Ikdwt$~xte(aU5etpK2v4MPhv*>+`0nS zIA3G zc657AUdu^H>p|1)_O~m4NlQxZ?)JmA_#F4|m}BR#7_P6sCGHqvkB0b+n>g$X!w#QYwhW`?8d`Z=rtDJ7YLSrTdQHiA@%e4e>L zzkcWX<#SQVs98wQ?`UoXBojdtgwXQXu?%}wA(4?LC%bahTp^~>1G{@#yRYlbYSwkK zqc+<6{YSi@9~Q~2fMpxC@-o_bPJ#BO+x{Z9j$zapUDPZC{3AG|FWAQj{UJyf&+@F{ z9c`0~vOTzgEiY@nX1r@Lg^$nm*9V{7$lc3fbCC_7ZOS%pf8ZdBu`(eZE9>d&tPZs z2lyzPiy9IR3=fR#uOY}JhHz?XZWCI`W`&FPqKc}T9+>E2MzNxLEzsZ1haAv*ODS$; z*x#+ennaX>94GSf$6-Uv`M_(Ms|Wm>cK%QD%z{fi-X ztlN5`R=`^^wQ}EGpXC%jdFXz0ZRQZ4X=ivTq}8bk|H5VHNJjvDs78O#bH)MP@PVG! zqD0pu-WD4(su8gsx5Lkag2dv>+2kpt>3-fS-!CHWqh~Z#71zRm7C?Lfqeo|CJ~wMf zlXFw+Z|T3~mX|hexXb8xckhf9#o_L4T3r!1lNyYQ?ib=gqZGe|c91V|m+9%}AnvK} z1TW3{^`+$ZE{Xb1&ouO^xWliPz+)U3#PZnEs%8_for!O~BV53>NYs~v6eA9dSEFs= zyXo2yiL?y^@!Ss{izQXqBrQ9MRT&fEwLfr=zpQid4h;G><$pv4sl#y%N@2~Zk~X(a z8VHq`?c6h=p1!od!sQoP>WV&Jc^-d5glRMmKrI?#O|#67h>3hv!OJ9{uUNJY(8{X( zcnXhyo=Hkm&Pwp9=u|Rd%%Z__!3Anz;}`@h+qRQfE}q7wNAI3gofX(}-NIHSNF5)v zCvcVOEqIV#qb@T^H-C#Z#`JOr5co31&BUEtlR5a<-(o~ge1{mF;ap7x#P@L97N|6= zF;ex`{>{RW+7{a4tqMx{ccs9jl1Zvepuc(|!3pS}w@lWTC5$-^E}aPsloDUV>8l|!=w58T z{$%5lfwj9KKKiBANA&|i?(ZOd-_y}Z4Qdv1s0lav{DfbZK{}sQV5t|bZYs)cDr<6U z<~M_&(9sv66shv8nF)b}1uD2W2}z!G^NMa%mA;$SnhYXbzoyP82l$Ko*$#BfP;@HC zuG6pdQMF`d^UT%Ha=PR{EsDY7m@N8KX{=fSoShN29w@Pp5{3@>Man0qs~fl9Q9u=B z%_V{MIQg|jV@9-FMcVThvO`aKIaNEoWs zVi)(?yGuKX@rsYCF!prUmJ!ln&>j+EuxF zJM?X~M-eY&2E_H8SG8FqD&XQ|%g=1`)WV!v&OWla<2{b9Tgw+|CkrL$HNQXK>RQf%zpWM)+U$Jc@0&5SZ4*QD^b76mNFtg0 znAhH<5ogY3{xS_#k90jI53xVDjGs9S+cd3tXT}nWBh_hzzF*X-@N#l3RWYMDHIl6_ zg_ks4vFh7n;fQbk;J$D{IsDY=kb1=O!L=9i%5tQq%?^QaHFJgO^jzYpKbrdGA&E@-Ck zFNRyodkZVudt!Wvc({!Y5TiI@3|7uH$#L&J<1WpxP+9 zVpCtfK2+TP>8zuP>Uh+3+SW$L{aLO~hA_0b$)JdJ*~=JgD`4I{`b$Uo&pfJ-#$MV z;+|voJGjs4>(X8IR&eaU>?v*eWd!_^w#@3~^RSgN0Y2_9IWNuVHX;}Lwqmyh1X2Zo zYSPBS7qRzz1*_6*WS|3d#UhJtX;Ou2(VjPvKSn*lG=xa@IRa=cfrK(#T~Y1c`3pe;l!)^}KUu zER@1Z8Q#nj^1b3!8YQypgS<=xt1`K7KYoOg+6AAgIL=3dv{4n$% zIK}z?uk3ek(Q=Zjl<<5`lErE}dTyXKKny(aD+Pqm2dtbzy2TVT%uA((6v>(XKS1~y zX)Pu0@cI$HXD{!g5Jq0PEWzKB=x4P4oZQpBV8{|6`C;s>+H`hqs%ffsa5#Fu(|e>` z+!k0L(Ek9B{Rd~g$I<9g?H&v{?mlpQmAW&UA5o3C!;B3_%4RTMu*y_zZPbI7jf z$O-A`Krz(Gd`G-k4j1MoDr;v*3Jx|# zKGoH$^g7ON&kMnbU|r%BiS|Pfs@YuLipdF@Oe#`#*srL)h~Ot|Ig4|aFkxYy7#qhXByB(5!u=VF zPCR#E;Iomtv2*o4ZO6y`&8D*&dva5%?CGg9`%?1@sjTw$0sf1a>E+R}aKGF2WH!6@ z9_5xp&V-AFL=@8W_KS0F=5P9NCRtEC4t7dw(n-_}rR-?ur}(k+NvU@IWb5S@8>w}5 zfC=Y%G4UXqts!rJy`k?ErOZl#vc!c|?K7r|Z(`wv4+3qR+53R$TR0wQ=*lZzfo)0p zsQI|}dp?%_npu06H{Q0j&~@HCDD_Op8As4!@G6u)l0J&w#f=fgtFjDdz3+Li{P`A? zCbMm{C|Ow?0L6N;G4G>W2W-klL(6H=D$wr@7jWIb-@X+$j$3qxvI>{8twmmcaohk7o*nO36JVWt)jl{oYRO$ zewyG-x3H-9E0-ivWd{rko2R1X)I8PA}&Z-Dh7c ze@2YAK4ao$(V4X-w9w=^S)B>UxC=%18Xq-}(7#@Ch$q4DT@fF-ARkO1rj!&fb@sItxKJk=9 zAkm7)e_((;2yTx*3fh*qax|MNif8=w(9EWyT3=+UJ(*#Pe`DWQ~$T^FS)ODNjM7#^5zmtjsSeddK)7g1Wk)UiAB% z-sG^s{4w3{w$$jvBAeM4IU~wDC6ydSKN1puj3B_Y21S5=W1F3R$wng#(Pmav)3akr zOK-r>OI#QCce-X(Dy#Uku))>R+znq!Jy&-@ck!E$ov_9{xGNy_*?xc57!)BfQP?w& zl^~`bnPu1R4Q`jaJ+xayuN|BRw8@&@1iQ~NWOJWM+|X^l4DV59btez0t!=|7=6Qx! zHco}X*~iK>8@!qDk+avYZpsU6Jv_adP`0?K9mTW2~@NHCUC;& z&|Z$NUb8oyF|J3jz{}#f^g0b?*=>T_j_3>QnER5$qSnV|j}zt1-=r8}=?kqv$;&P- zxX?ek^rH&T-je=aB3+f33%EaNuLw|TR`BVRY#S2%Kqs~wfsTr5$ERltFf~y-jDL@+ zt*F_*k*$2+Lsi$aT5o1*9i6l1RVSxzCb;bAn9-D{?2qq@vFtUe3eynI_RVB;LnVjl z@LS_UjX{7Q#a$_yzvu8!wiq(Qbp~;CKCtejg5jDeuxfY4i_#X_#UCFeKtk@X{K|m( z2NcpxvdZuqjsv1nIb1xHA(}Y7vb{X>X~|^FUnICEntsSxJAmStrnMENz$>}3innpm=0(i8?OXljvX ze0&bzOM|&^l7G{7CCP3rH(2?5kJy&Z_2%wJ)?7>92FCBb{iw#%^XRa^)?8Fr5;P$Z zV)7%r&-s!=MsezG{dt14%#=GdT)|VD-3S;QRpGu)UJv(M>h?NZyzCE$#RuZ)oNU^* zy3-2EKq9M!-O8%Lbp_kA?TMz};xs(YBm4LZh@QW`Gd&#UhE1W5?H*5dZU*xbk}^udt$0EerGR z7PZUsZfblX2ad&fQptAep#GWc zqpy+4KssR6@U5|$?G#9J#Ei-OL86le=DqRzgEuD$*$RG|SKjNPWrCo}7P&K~9W7Ux zyTOd93>4@4mz$JPdd{gE;%R6VirxX|BQ0)=EZ`qc?6;%5vhT6)+FvlT@zC|9@8rK~ z7Q*!AegKVLR!#NuWQvOs)Q*dJHOnf~w#eB4C;3{mZ3dO53KY$*)oK`u_f9E{xHGxL z+G!erCCJ2vlHBnv!GxIeQi+kCdR=I5Kuqs#MM@hUUNu_M>fRg}rRVa@UGy~4o5~Uu zj(+NC%oyMD0SmC2bbY^%I`Gj`(#Bkgq(%OnttnT<4Z46B;MuU!ns0LbL{4V>1-<29 z|7Dc~cV^^6Ry|O{JE$HAt*G55WP`$kGY5ka(%2!K%OI^uaj0PNcMNz)r|MOnUSFql zIMUmk$QIxD%2X$0zOYNr=OK?a^R+EWeM4bHL4~}gJ-Nd$rLFOR2ji?ar{!imD4WvO zdLS(@!R@7{Ka*vvq;^hCWPN{HB+e~RX;`TslJ2$E?|h9iWDX@HO>F5X#RwIGwwqF# z2AFIP*FbRkCCO-n;EMcSOMqA%al$H7%;Tbm$$?(>vN=oXC=#sJ#3J zgdGr}gH<&KyFR_-KjDBbBE^Ln_cC^R!vpFdT>Exx*md3S$&ju2cMj6R9kU2

K>ZR!F(1lu zkDpingVei20_#)a74qJ7#Mr4@eN(gp{nW`lx!QA%kGjwUHNQu?|)Gc6>7u|D0f%$dae=MoJD#yxams0s)Ncfe!iZHckM7vv?k!ru+&O|UzQd)pXZjB3 z>%~Z{!SLt(->FV-f@nU-`Sa^Vz2SQM2FoHm{^a#wIF`M0;Fdg2bG;Cc&6e;RToX5}LR^0o6(rleI(5;Js2bj+YBp$ez3|m;@$C}K6*ZCm*SXSo*jS&4DQe)v1K;(T zGljdyVRW+6W9`I|8nmi;zcUdk;HBQ_yrJljlcu4H#hjRhYEb1&J&FTHWAc+xDWP=| zY>dKu{ID?~nu6dil88O#6~AP}(8+?7{t(}Mhu~X=O~L8_Zdol^0JbFH6w{xKvUrI4dIblq(@*-tD3O4)myj7h4(Zow%bTQA z$140O3@G}~O%ZeA!&@*G`6la#0gD*yTssV#&1a##}779HoRV}AoW_aIv$4!o-ch< zQ&qX1t@+A4Po^8*EB`TcHrm1lM1=B{VhopQzOlY1S_BVo)p}3DgMpYEFYBaM3fxZO zISQFMf_gWdv4_0=mWT`QFrz5`tGYcKmopL614EWeWU}2JeLoCtY%MKa6WH9u*{wY+ zf<+Qt9Yfe%XY0#?ny-CCtqI8Hd_>0c6hP_N*->n|EJW)(2bxwq-I(v8E7p8|JUA$I zyL8Sg1TVh1z$CANWmxE|4C%J<@TB5l$ImiO4pwwy{LJrjJY^M%em9ZZdVhc(fPDQlms!s3^U=Ye1+!4k}8>2Ms}* zJgjs4jn4cZu#Hx0Ms++mak#t0Hu-f>!v(wchBdZ4OiTkvGO`ig&j8$5NwqYQv)_KH zK|CDq{v9>c8&KV*V8i$s+njFKR~6~?Y{S`)ABBjA#(XW<7q!pFT-`Wb0(B9FdumV* znP0@z*3~%Bke*jN$2O>bhzLWg7^*3+K^z+H!bat&@2w0q9*p%M?)aWTew*A(9EOCc zCSx_U`kd!7UE)o^uoXR56W@!AYK>NdDujbr^miYtzu{BVMZ8HN2@|HEtCQY!*1)CQ z%cqm3p7|vaPQIV84z-eBjhg`L48+rm`1aPm<6sTi!y878Vnx6aPA)twxyHc=i^G-C z%g6K$QQG|Qmk%AZ3R2|M$l{#`XP2}z7diy-;nVb?$!bl`IH-2(!Og@da8NM*coaaa zj@fb*Bec{QMh4Mvn0Ft~tAV+o4p2)RGn6Fa*NCG_D`8JhkFVZ5POoc;vLb>%kIe{i z{!Z*32+Au>c9&)c_l27&}`3_U^}shk+xWU$)L_EU9Hzv8_$8#4ZhD}mEkH%3^!f;*Ie zdBh)=b+@i!5G-VEJvJ+Wxcg1V#m$%SH8%EwhzSyha^y?9{3H@wl2LgnL@{SWC<3gt z!+_nH)!55dq;iVvF(q~c-j-f(V7zEEq71*n;iFPxQgq>X&8%_dUgb6w>72;59@PYJh^a+pLf;$m_F696;vS63@vR` zXjR~fZPE*=m#m>N`mo}JF>l{>c)$xk&^$BnMscah4_Y){wWZLtaY!H`B3yW}x3^=W zYnwmn?m2VxQ^UYvgaCz3Z0=bv)l8>dqpd;UJ{r1<*G#3aFokY@{@zc-7kgwoC~ln^ z4mtVu^5zaXVYXquutAuWfd=7FE;b*&BD~KD)XgW*@Sm0thib9i?`-=y6CAdW9*t&@ zvItclMPl`W5XEh8+xmrJ*m)!>TC`i|Q6`idImak3OtH(EJ6fdYiVA^o>$y-E30nQx zS#18?In+4?Tzhn9Zg{MLyg?)y>ba|1r-3)V!PL~!G$k>-i0f$s{0*;-xwgNyJpL>Q zQ9-N^fYo{=fKM2BGWXN}n}&k}RQwnV2QwRtE`|apAZdn+lz9KR+LoeIap=SESV5?m zStRafNSHJ1qm%-jAI+L1;gWvdXN^_9X3W&J^@Xy=v6s``3z4CbDl5Y*-83tCE`fB` zZ&+gQCv8P%Dc7Q({rva$)b!M_6np07z>(FV=R>d$*~?wEeVeNn)emnD49VKdd~VB* zkID!9!_;v*o&9cWkNHnbML#TktD+-ouk^WXIX>DQkh8x(LK!^A#|!5sYp?aW9XLLs z8}zSJPfAD+!wkn#dYB%S{*s7j-|CHdZ^h(F3cyX3ReQ6Wc-H6Ed2ns}LCxR+>La(9 zlSgMVLmt8y396`%^^1>@v0Q}1m&->>6MO&8yno6af6TmRVPWTB{!i-te_I3nJ@p>k z2g%jQ1yY1dQsgli|1m8efXpOh=xqAb)(70&>Hn61FDxmd?CA_ne3!H{vj_M7AqQ96 zGPeX*|MDcK5d^nkGJQkNB=2Nm>SSqWPEI3f0#2;Abnyfum0cYiY)ow*6YZJF!CCQ7 z&4d2C1Av_6PcI~}0u_64NfB8?aId1j_>q~^AA28ZvvIPM12{Rz*;v`hS^q!~826at z|A+&D-v9vX6MV$~g?ogL??>Lh_?|wqf@Oi}fMEPzV^%Qkar^{XIU%~hxWCT>!1%w@ zJ%0WR_Xr<#J>s75V0(aI+<)f5<0o4lW&CXo9IT)x`oHY_r<}j!{bk#uJuGZ2|FQM2 z`A7c8H9d}Xk^kN$3h>xH3Q`-4{I4Ac92}gFXX8)#HYV`Vm$b8VA%ASnWvp!KLT+Yh zXX5myJr%hT_*B~=gA2KtSc3aIK}Ju1jcpAbz`G;L2hDd1{MT9`H;;Z zPcFsqmm4sEdHzfB=&|6MagQ7<5Mv&jL^(fpzH&0O19#oxVFthW$a%!Tuct0y|Ck3r zxQxvHm;4Xc6L^xr0+zwd%qql3uKfr(Awy9JV1od$CjbIKMj{XZVFa;0jR23PfZ0J1 z0E7Sz2oQz<_9p;>XaGRwKoCk0gbx6jV~32`Ad1)^ir69A+5eD0EMtRE0wGe^AwU>H z$p*2S4MNEd(Z>$4lMNz;4YEu&2p=27KsNB90563dvM{zk!XRs4|3mU;4k8oqXMKWC zd;o}AAcPM9(E$2mE`$X1=U_pkutF$75Ya%$ia?M>10aV0_=gXo4+vogLP&sr_#ktl zkHP@Vf=?y^L?D&{ASCP%DeQmvAliX{PBCQB0LTakK0Hrd%>MEoaOK0NtKdJrYwYUu z*z@h_QhD;|M^Cl1GyTuiuH)OilPIZDM|)z0$qLLq+&7fS!#!1V9>jZbA8 zk(s0|O`OS*wUHsk7Fi&Bn82ch?Onmq9(+^zE0};==ZT1cmt*Pyjxdt8;G&O0f1vQ+ zyJ@2Xf@cYY_4xX9IzNAaXPE|Kh+FKfcKA{@?%{Z0ukY{s+g*%)$n)3i%Hl z00iEP^-mnQtKR?O030B|KXouOgE;?%V`m3|&(=TWu|DpE`v(rh{(s>G-@5)m$Ij0B zFB}I8^FQUWuySzxYaQ&Ij~mbaS1-6};=g%0ng3}E3y6gse24fSUI05Y+dp{$?4WcV7{ literal 0 HcmV?d00001 diff --git a/integration-tests/src/test/resources-binary/samples/test_semantic/README.txt b/integration-tests/src/test/resources-binary/samples/test_semantic/README.txt new file mode 100644 index 000000000..df7f0c685 --- /dev/null +++ b/integration-tests/src/test/resources-binary/samples/test_semantic/README.txt @@ -0,0 +1,2 @@ +This dataset is coming from https://www.kaggle.com/datasets/snehaanbhawal/resume-dataset +With a CC0: Public Domain license diff --git a/settings/src/main/java/fr/pilato/elasticsearch/crawler/fs/settings/Elasticsearch.java b/settings/src/main/java/fr/pilato/elasticsearch/crawler/fs/settings/Elasticsearch.java index cb5b0259d..8855e4387 100644 --- a/settings/src/main/java/fr/pilato/elasticsearch/crawler/fs/settings/Elasticsearch.java +++ b/settings/src/main/java/fr/pilato/elasticsearch/crawler/fs/settings/Elasticsearch.java @@ -62,6 +62,7 @@ public class Elasticsearch { private boolean sslVerification = true; private boolean pushTemplates = true; private String caCertificate; + private boolean semanticSearch = true; public Elasticsearch() { @@ -72,7 +73,7 @@ private Elasticsearch(List nodes, String index, String indexFolder, i String username, String password, String pipeline, String pathPrefix, boolean sslVerification, String caCertificate, - boolean pushTemplates) { + boolean pushTemplates, boolean semanticSearch) { this.nodes = nodes; this.index = index; this.indexFolder = indexFolder; @@ -87,6 +88,7 @@ private Elasticsearch(List nodes, String index, String indexFolder, i this.sslVerification = sslVerification; this.caCertificate = caCertificate; this.pushTemplates = pushTemplates; + this.semanticSearch = semanticSearch; } public static Builder builder() { @@ -211,6 +213,14 @@ public void setCaCertificate(String caCertificate) { this.caCertificate = caCertificate; } + public boolean isSemanticSearch() { + return semanticSearch; + } + + public void setSemanticSearch(boolean semanticSearch) { + this.semanticSearch = semanticSearch; + } + @SuppressWarnings("UnusedReturnValue") public static class Builder { private List nodes = Collections.singletonList(NODE_DEFAULT); @@ -227,6 +237,7 @@ public static class Builder { private String caCertificate; private boolean pushTemplates = true; private String apiKey = null; + private boolean semanticSearch = true; public Builder setNodes(List nodes) { this.nodes = nodes; @@ -335,12 +346,17 @@ public Builder setPushTemplates(boolean pushTemplates) { return this; } + public Builder setSemanticSearch(boolean semanticSearch) { + this.semanticSearch = semanticSearch; + return this; + } + public Elasticsearch build() { return new Elasticsearch(nodes, index, indexFolder, bulkSize, flushInterval, byteSize, apiKey, username, password, pipeline, pathPrefix, sslVerification, caCertificate, - pushTemplates); + pushTemplates, semanticSearch); } }