elastic
diff --git a/‎.github/workflows/tests.yml
Lines changed: 9 additions & 1 deletion b/‎.github/workflows/tests.yml
Lines changed: 9 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml
Lines changed: 4 additions & 0 deletions b/‎.pre-commit-config.yaml
Lines changed: 4 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md
Lines changed: 13 additions & 3 deletions b/‎CONTRIBUTING.md
Lines changed: 13 additions & 3 deletions
diff --git a/‎Makefile
Lines changed: 14 additions & 10 deletions b/‎Makefile
Lines changed: 14 additions & 10 deletions
diff --git a/‎bin/mocks/elasticsearch.py
Lines changed: 9 additions & 9 deletions b/‎bin/mocks/elasticsearch.py
Lines changed: 9 additions & 9 deletions
diff --git a/‎bin/nbtest
Lines changed: 1 addition & 1 deletion b/‎bin/nbtest
Lines changed: 1 addition & 1 deletion
diff --git a/‎example-apps/chatbot-rag-app/api/chat.py
Lines changed: 20 additions & 12 deletions b/‎example-apps/chatbot-rag-app/api/chat.py
Lines changed: 20 additions & 12 deletions
diff --git a/‎example-apps/chatbot-rag-app/api/llm_integrations.py
Lines changed: 35 additions & 13 deletions b/‎example-apps/chatbot-rag-app/api/llm_integrations.py
Lines changed: 35 additions & 13 deletions
diff --git a/‎example-apps/chatbot-rag-app/data/index_data.py
Lines changed: 9 additions & 7 deletions b/‎example-apps/chatbot-rag-app/data/index_data.py
Lines changed: 9 additions & 7 deletions
@@ -30,14 +30,22 @@ jobs:
         ports:
           - 9200:9200
     steps:
+      - name: Remove irrelevant software  # to free up required disk space
+        run: |
+          df -h
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /usr/share/dotnet
+          df -h
       - name: Checkout
         uses: actions/checkout@v4
       - name: Setup python
         uses: actions/setup-python@v5
         with:
           python-version: '3.10'
       - name: Setup nbtest
-        run: make nbtest
+        run: make install-nbtest
       - name: Warm up
         continue-on-error: true
         run: sleep 30 && PATCH_ES=1 ELASTIC_CLOUD_ID=foo ELASTIC_API_KEY=bar bin/nbtest notebooks/search/00-quick-start.ipynb
 
@@ -13,3 +13,7 @@ repos:
     # generic [...]_PASSWORD=[...] pattern
     - --additional-pattern
     - '_PASSWORD=[0-9a-zA-Z_-]{10}'
+- repo: https://github.com/ambv/black
+  rev: 24.1.1  # Use latest tag on GitHub
+  hooks:
+  - id: black-jupyter
@@ -5,24 +5,34 @@ If you would like to contribute new example apps to the `elasticsearch-labs` rep
 ## Before you start
 
 Prior to opening a pull request, please:
-- Create an issue to [discuss the scope of your proposal](https://github.com/elastic/elasticsearch-labs/issues). We are happy to provide guidance to make for a pleasant contribution experience.
-- Sign the [Contributor License Agreement](https://www.elastic.co/contributor-agreement/). We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction. We ask this of all contributors in order to assure our users of the origin and continuing existence of the code. You only need to sign the CLA once.
+1. Create an issue to [discuss the scope of your proposal](https://github.com/elastic/elasticsearch-labs/issues). We are happy to provide guidance to make for a pleasant contribution experience.
+2. Sign the [Contributor License Agreement](https://www.elastic.co/contributor-agreement/). We are not asking you to assign copyright to us, but to give us the right to distribute your code without restriction. We ask this of all contributors in order to assure our users of the origin and continuing existence of the code. You only need to sign the CLA once.
+3. Install pre-commit...
 
 ### Pre-commit hook
 
 This repository has a pre-commit hook that ensures that your contributed code follows our guidelines. It is strongly recommended that you install the pre-commit hook on your locally cloned repository, as that will allow you to check the correctness of your submission without having to wait for our continuous integration build. To install the pre-commit hook, clone this repository and then run the following command from its top-level directory:
 
 ```bash
-make pre-commit
+make install
 ```
 
 If you do not have access to the `make` utility, you can also install the pre-commit hook with Python:
 
 ```bash
 python -m venv .venv
+.venv/bin/pip install -qqq -r requirements-dev.txt
 .venv/bin/pre-commit install
 ```
 
+Now it can happen that you get an error when you try to commit, for example if your code or your notebook was not formatted with the [black formatter](https://github.com/psf/black). In this case, please run this command from the repo root:
+
+```bash
+make pre-commit
+```
+
+If you now include the changed files in your commit, it should succeed.
+
 ## General instruction
 
 - If the notebook or code sample requires signing up a Elastic cloud instance, make sure to add appropriate `utm_source` and `utm_content` in the cloud registration url. For example, the Elastic cloud sign up url for the Python notebooks should have `utm_source=github&utm_content=elasticsearch-labs-notebook` and code examples should have `utm_source=github&utm_content=elasticsearch-labs-samples`.
 
@@ -1,20 +1,24 @@
 # this is the list of notebooks that are integrated with the testing framework
 NOTEBOOKS = $(shell bin/find-notebooks-to-test.sh)
+VENV = .venv
 
-.PHONY: install pre-commit nbtest test notebooks
+.PHONY: install install-pre-commit install-nbtest test notebooks
 
-test: nbtest notebooks
+test: install-nbtest notebooks
 
 notebooks:
 	bin/nbtest $(NOTEBOOKS)
 
-install: pre-commit nbtest
+pre-commit: install-pre-commit
+	$(VENV)/bin/pre-commit run --all-files
 
-pre-commit:
-	python -m venv .venv
-	.venv/bin/pip install -qqq -r requirements-dev.txt
-	.venv/bin/pre-commit install
+install: install-pre-commit install-nbtest
 
-nbtest:
-	python3 -m venv .venv
-	.venv/bin/pip install -qqq elastic-nbtest
+install-pre-commit:
+	python -m venv $(VENV)
+	$(VENV)/bin/pip install -qqq -r requirements-dev.txt
+	$(VENV)/bin/pre-commit install
+
+install-nbtest:
+	python3 -m venv $(VENV)
+	$(VENV)/bin/pip install -qqq elastic-nbtest
@@ -8,30 +8,30 @@ def patch_elasticsearch():
 
     # remove the path entry that refers to this directory
     for path in sys.path:
-        if not path.startswith('/'):
+        if not path.startswith("/"):
             path = os.path.join(os.getcwd(), path)
-        if __file__ == os.path.join(path, 'elasticsearch.py'):
+        if __file__ == os.path.join(path, "elasticsearch.py"):
             sys.path.remove(path)
             break
 
     # remove this module, and import the real one instead
-    del sys.modules['elasticsearch']
+    del sys.modules["elasticsearch"]
     import elasticsearch
 
     # restore the import path
     sys.path = saved_path
 
-    # preserve the original Elasticsearch.__init__ method   
+    # preserve the original Elasticsearch.__init__ method
     orig_es_init = elasticsearch.Elasticsearch.__init__
 
     # patched version of Elasticsearch.__init__ that connects to self-hosted
     # regardless of connection arguments given
     def patched_es_init(self, *args, **kwargs):
-        if 'cloud_id' in kwargs:
-            assert kwargs['cloud_id'] == 'foo'
-        if 'api_key' in kwargs:
-            assert kwargs['api_key'] == 'bar'
-        return orig_es_init(self, 'http://localhost:9200')
+        if "cloud_id" in kwargs:
+            assert kwargs["cloud_id"] == "foo"
+        if "api_key" in kwargs:
+            assert kwargs["api_key"] == "bar"
+        return orig_es_init(self, "http://localhost:9200", timeout=60)
 
     # patch Elasticsearch.__init__
     elasticsearch.Elasticsearch.__init__ = patched_es_init
 
@@ -2,7 +2,7 @@
 SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 
 if [[ ! -f $SCRIPT_DIR/../.venv/bin/nbtest ]]; then
-    make nbtest
+    make install-nbtest
 fi
 
 if [[ "$PATCH_ES" != "" ]]; then
 
@@ -36,31 +36,39 @@ def ask_question(question, session_id):
     if len(chat_history.messages) > 0:
         # create a condensed question
         condense_question_prompt = render_template(
-            'condense_question_prompt.txt', question=question,
-            chat_history=chat_history.messages)
+            "condense_question_prompt.txt",
+            question=question,
+            chat_history=chat_history.messages,
+        )
         condensed_question = get_llm().invoke(condense_question_prompt).content
     else:
         condensed_question = question
 
-    current_app.logger.debug('Condensed question: %s', condensed_question)
-    current_app.logger.debug('Question: %s', question)
+    current_app.logger.debug("Condensed question: %s", condensed_question)
+    current_app.logger.debug("Question: %s", question)
 
     docs = store.as_retriever().invoke(condensed_question)
     for doc in docs:
-        doc_source = {**doc.metadata, 'page_content': doc.page_content}
-        current_app.logger.debug('Retrieved document passage from: %s', doc.metadata['name'])
-        yield f'data: {SOURCE_TAG} {json.dumps(doc_source)}\n\n'
+        doc_source = {**doc.metadata, "page_content": doc.page_content}
+        current_app.logger.debug(
+            "Retrieved document passage from: %s", doc.metadata["name"]
+        )
+        yield f"data: {SOURCE_TAG} {json.dumps(doc_source)}\n\n"
 
-    qa_prompt = render_template('rag_prompt.txt', question=question, docs=docs,
-                                chat_history=chat_history.messages)
+    qa_prompt = render_template(
+        "rag_prompt.txt",
+        question=question,
+        docs=docs,
+        chat_history=chat_history.messages,
+    )
 
-    answer = ''
+    answer = ""
     for chunk in get_llm().stream(qa_prompt):
-        yield f'data: {chunk.content}\n\n'
+        yield f"data: {chunk.content}\n\n"
         answer += chunk.content
 
     yield f"data: {DONE_TAG}\n\n"
-    current_app.logger.debug('Answer: %s', answer)
+    current_app.logger.debug("Answer: %s", answer)
 
     chat_history.add_user_message(question)
     chat_history.add_ai_message(answer)
@@ -5,37 +5,54 @@
 
 LLM_TYPE = os.getenv("LLM_TYPE", "openai")
 
+
 def init_openai_chat(temperature):
     OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-    return ChatOpenAI(openai_api_key=OPENAI_API_KEY, streaming=True, temperature=temperature)
+    return ChatOpenAI(
+        openai_api_key=OPENAI_API_KEY, streaming=True, temperature=temperature
+    )
+
+
 def init_vertex_chat(temperature):
     VERTEX_PROJECT_ID = os.getenv("VERTEX_PROJECT_ID")
     VERTEX_REGION = os.getenv("VERTEX_REGION", "us-central1")
     vertexai.init(project=VERTEX_PROJECT_ID, location=VERTEX_REGION)
     return ChatVertexAI(streaming=True, temperature=temperature)
+
+
 def init_azure_chat(temperature):
-    OPENAI_VERSION=os.getenv("OPENAI_VERSION", "2023-05-15")
-    BASE_URL=os.getenv("OPENAI_BASE_URL")
-    OPENAI_API_KEY=os.getenv("OPENAI_API_KEY")
-    OPENAI_ENGINE=os.getenv("OPENAI_ENGINE")
+    OPENAI_VERSION = os.getenv("OPENAI_VERSION", "2023-05-15")
+    BASE_URL = os.getenv("OPENAI_BASE_URL")
+    OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+    OPENAI_ENGINE = os.getenv("OPENAI_ENGINE")
     return AzureChatOpenAI(
         deployment_name=OPENAI_ENGINE,
         openai_api_base=BASE_URL,
         openai_api_version=OPENAI_VERSION,
         openai_api_key=OPENAI_API_KEY,
         streaming=True,
-        temperature=temperature)
+        temperature=temperature,
+    )
+
+
 def init_bedrock(temperature):
-    AWS_ACCESS_KEY=os.getenv("AWS_ACCESS_KEY")
-    AWS_SECRET_KEY=os.getenv("AWS_SECRET_KEY")
-    AWS_REGION=os.getenv("AWS_REGION")
-    AWS_MODEL_ID=os.getenv("AWS_MODEL_ID", "anthropic.claude-v2")
-    BEDROCK_CLIENT=boto3.client(service_name="bedrock-runtime", region_name=AWS_REGION, aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_KEY)
+    AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY")
+    AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY")
+    AWS_REGION = os.getenv("AWS_REGION")
+    AWS_MODEL_ID = os.getenv("AWS_MODEL_ID", "anthropic.claude-v2")
+    BEDROCK_CLIENT = boto3.client(
+        service_name="bedrock-runtime",
+        region_name=AWS_REGION,
+        aws_access_key_id=AWS_ACCESS_KEY,
+        aws_secret_access_key=AWS_SECRET_KEY,
+    )
     return BedrockChat(
         client=BEDROCK_CLIENT,
         model_id=AWS_MODEL_ID,
         streaming=True,
-        model_kwargs={"temperature":temperature})
+        model_kwargs={"temperature": temperature},
+    )
+
 
 MAP_LLM_TYPE_TO_CHAT_MODEL = {
     "azure": init_azure_chat,
@@ -44,8 +61,13 @@ def init_bedrock(temperature):
     "vertex": init_vertex_chat,
 }
 
+
 def get_llm(temperature=0):
     if not LLM_TYPE in MAP_LLM_TYPE_TO_CHAT_MODEL:
-        raise Exception("LLM type not found. Please set LLM_TYPE to one of: " + ", ".join(MAP_LLM_TYPE_TO_CHAT_MODEL.keys()) + ".")
+        raise Exception(
+            "LLM type not found. Please set LLM_TYPE to one of: "
+            + ", ".join(MAP_LLM_TYPE_TO_CHAT_MODEL.keys())
+            + "."
+        )
 
     return MAP_LLM_TYPE_TO_CHAT_MODEL[LLM_TYPE](temperature=temperature)
@@ -61,14 +61,16 @@ def main():
 
     print(f"Loading data from ${FILE}")
 
-    metadata_keys = ['name', 'summary', 'url', 'category', 'updated_at']
+    metadata_keys = ["name", "summary", "url", "category", "updated_at"]
     workplace_docs = []
-    with open(FILE, 'rt') as f:
+    with open(FILE, "rt") as f:
         for doc in json.loads(f.read()):
-            workplace_docs.append(Document(
-                page_content=doc['content'],
-                metadata={k: doc.get(k) for k in metadata_keys}
-            ))
+            workplace_docs.append(
+                Document(
+                    page_content=doc["content"],
+                    metadata={k: doc.get(k) for k in metadata_keys},
+                )
+            )
 
     print(f"Loaded {len(workplace_docs)} documents")
 
@@ -92,7 +94,7 @@ def main():
         index_name=INDEX,
         strategy=ElasticsearchStore.SparseVectorRetrievalStrategy(model_id=ELSER_MODEL),
         bulk_kwargs={
-            'request_timeout': 60,
+            "request_timeout": 60,
         },
     )