From bce63896138f3196a824e4f261fd8abfb7120703 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Fri, 27 Oct 2023 11:35:58 -0400 Subject: [PATCH 01/88] make the openai key variable truly optional and don't set it in the dist or docker env files --- .env.dist | 1 - .env.docker | 1 - swirl_server/settings.py | 3 +-- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.env.dist b/.env.dist index c8372dcf0..acf6c79ee 100644 --- a/.env.dist +++ b/.env.dist @@ -11,4 +11,3 @@ SQL_PORT=5432 MICROSOFT_CLIENT_ID='' MICROSOFT_CLIENT_SECRET='' MICROSOFT_REDIRECT_URI='' -OPENAI_API_KEY= diff --git a/.env.docker b/.env.docker index 2e8c9cb13..341154cd0 100644 --- a/.env.docker +++ b/.env.docker @@ -13,4 +13,3 @@ MICROSOFT_CLIENT_SECRET='' MICROSOFT_REDIRECT_URI='http://localhost:8000/swirl/microsoft-callback' CELERY_BROKER_URL='redis://redis:6379/0' CELERY_RESULT_BACKEND='redis://redis:6379/0' -OPENAI_API_KEY='' diff --git a/swirl_server/settings.py b/swirl_server/settings.py index 2b85b7561..a097bf824 100644 --- a/swirl_server/settings.py +++ b/swirl_server/settings.py @@ -264,8 +264,7 @@ SWIRL_DEFAULT_RESULT_BLOCK = 'ai_summary' -if 'OPENAI_API_KEY' in env: - OPENAI_API_KEY = env('OPENAI_API_KEY') +OPENAI_API_KEY = env.get_value('OPENAI_API_KEY', default='') MICROSOFT_CLIENT_ID= env('MICROSOFT_CLIENT_ID') MICROSOFT_CLIENT_SECRET = env('MICROSOFT_CLIENT_SECRET') From d86ce4b300759a211ad0b53f151cd8a0b5c7df43 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Fri, 27 Oct 2023 12:28:29 -0400 Subject: [PATCH 02/88] add the open api to env --- docker-compose.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 04997c5a8..4b522e291 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -15,4 +15,5 @@ services: -e "s//$MSAL_HOST/" > static/api/config/default && python swirl.py start celery-worker celery-beats && daphne -b 0.0.0.0 -p 8000 swirl_server.asgi:application' - + environment: # Environment variables section + - OPENAI_API_KEY=${OPENAI_API_KEY} From dbd135cb613a95356be827f1dbccacc29e9221e6 Mon Sep 17 00:00:00 2001 From: Harsh Mahajan <127186841+HarshMN2345@users.noreply.github.com> Date: Sat, 28 Oct 2023 19:55:28 +0530 Subject: [PATCH 03/88] Create vespa.json --- SearchProviders/vespa.json | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 SearchProviders/vespa.json diff --git a/SearchProviders/vespa.json b/SearchProviders/vespa.json new file mode 100644 index 000000000..6b7b1ad1d --- /dev/null +++ b/SearchProviders/vespa.json @@ -0,0 +1,20 @@ +{ + "name": "Vespa", + "active": true, + "default": true, + "connector": "VespaRestSearch", + "url": "https://localhost:8080/search/", + "query_template": "{url}?query={query_string}&fields={fields}&hits={hits}", + "query_processors": ["AdaptiveQueryProcessor"], + "query_mappings": "", + "result_processors": [ + "MappingResultProcessor", + "LenLimitingResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "RESULTS=root_object.hits.hits, FOUND=root_object.hits.found, RETRIEVED=root_object.hits.retrieved", + "result_mappings": "title=fields.title, body=fields.id|fields.resource_type|fields.resource_subtype, author=fields.author, date_published=fields.date_published, url=fields.url", + "results_per_query": 10, + "credentials": "", + "tags": ["Vespa", "Search"] +} From 47cc3bf584ec10fc6cb83e42f8ed3807814626a8 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Sun, 29 Oct 2023 11:52:41 -0400 Subject: [PATCH 04/88] cherry-pick readme late Readme update from main --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 02cda4b95..c41ed3679 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ The most recent Search object will be displayed at the top. Click on the `result > **Warning** > The Docker version of Swirl *does not* retain any data or configuration when shut down! -:key: Swirl includes four (4) Google Programmable Search Engines (PSEs) to get you up and running right away. The credentials for these are shared with the Swirl Community. +:key: Swirl includes five (5) Google Programmable Search Engines (PSEs) to get you up and running right away. The credentials for these are shared with the Swirl Community. :key: Using Swirl with Microsoft 365 requires installation and approval by an authorized company Administrator. For more information, please review the [M365 Guide](https://docs.swirl.today/M365-Guide.html) or [contact us](mailto:hello@swirl.today). From ad7680f6c5ac4157837f9551cfd46e1fb7c905a3 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Mon, 30 Oct 2023 11:23:26 -0400 Subject: [PATCH 05/88] cherry pick from the main pre-release --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index c41ed3679..014e2d7b8 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,13 @@ Swirl adapts and distributes user queries to anything with a search API - search curl https://raw.githubusercontent.com/swirlai/swirl-search/main/docker-compose.yaml -o docker-compose.yaml ``` +* *Optional*: To enable Swirl's Real-Time Retrieval Augmented Generation (RAG) in Docker, run the following commands from the Console using a valid OpenAI API key: +``` shell +export MSAL_CB_PORT=8000 +export MSAL_HOST=localhost +export OPENAI_API_KEY=β€˜β€™ +``` + * In MacOS or Linux, run the following command from the Console: ``` From f21564a78e42fb485a7f58455108ee563ce03b35 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Mon, 30 Oct 2023 11:24:21 -0400 Subject: [PATCH 06/88] cherry pick from prerelease main --- README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 014e2d7b8..89eec1339 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@
-### Swirl is open-source software that simultaneously searches multiple content sources and returns AI ranked results. +### Swirl is open source software that simultaneously searches multiple content sources and returns AI ranked results.
@@ -201,7 +201,7 @@ For information about Swirl as a managed service, please [contact us](mailto:hel # πŸ“– Documentation -[Overview](https://docs.swirl.today/) | [Quick Start](https://docs.swirl.today/Quick-Start) | [User Guide](https://docs.swirl.today/User-Guide) | [Admin Guide](https://docs.swirl.today/Admin-Guide) | [M365 Guide](https://docs.swirl.today/M365-Guide) | [Developer Guide](https://docs.swirl.today/Developer-Guide) | [Developer Reference](https://docs.swirl.today/Developer-Reference) +[Overview](https://docs.swirl.today/) | [Quick Start](https://docs.swirl.today/Quick-Start) | [User Guide](https://docs.swirl.today/User-Guide) | [Admin Guide](https://docs.swirl.today/Admin-Guide) | [M365 Guide](https://docs.swirl.today/M365-Guide) | [Developer Guide](https://docs.swirl.today/Developer-Guide) | [Developer Reference](https://docs.swirl.today/Developer-Reference) | [AI Guide](https://docs.swirl.today/AI-Guide)
@@ -216,6 +216,3 @@ At Swirl, every user matters to us. Whether you're a beginner finding your way o * πŸ’Ό **Request A Connector (Enterprise Support)** Want to see a new connector quickly and fast. Contact the Swirl Team at: [support@swirl.today](mailto:support@swirl.today) Remember, you're part of our family now. πŸŒπŸ’™ - - -
From f581caa7dd12bdba0cdd9d325144b6060da8877a Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Mon, 30 Oct 2023 11:35:34 -0400 Subject: [PATCH 07/88] cherry-picked from prerelease main --- swirl/processors/rag.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/swirl/processors/rag.py b/swirl/processors/rag.py index 8718c8e5a..9be5eab61 100644 --- a/swirl/processors/rag.py +++ b/swirl/processors/rag.py @@ -29,6 +29,8 @@ MODEL_TOK_MAX = MODEL_3_TOK_MAX MODEL_DEFAULT_SYSTEM_GUIDE = "You are a helpful assistant who considers recent information when answering questions." FETCH_TO_SECS=10 +DO_MESSAGE_MOCK_ON_ERROR=True +MESSAGE_MOCK_ON_ERROR=f"Mock API resposne from {MODEL}. This is a mock response for testing purpose only." from celery.utils.log import get_task_logger logger = get_task_logger(__name__) @@ -225,10 +227,15 @@ def background_process(self): for (k,v) in fetch_prompt_errors.items(): logger.info(f'RAG:\t url:{k} problem:{v}') except Exception as err: - logger.error(f"error : {err} while creating CGPT response") - result = Result.objects.create(owner=self.search.owner, search_id=self.search, provider_id=5, searchprovider='ChatGPT', query_string_to_provider=new_prompt_text[:256], query_to_provider='None', status='READY', retrieved=1, found=1, json_results=[], time=0.0) - result.save() - return 0 + if DO_MESSAGE_MOCK_ON_ERROR: + logger.error(f"error : {err} while creating CGPT response") + logger.info(f'Returning mock message instead : {MESSAGE_MOCK_ON_ERROR}') + model_response = MESSAGE_MOCK_ON_ERROR + else: + logger.error(f"error : {err} while creating CGPT response") + result = Result.objects.create(owner=self.search.owner, search_id=self.search, provider_id=5, searchprovider='ChatGPT', query_string_to_provider=new_prompt_text[:256], query_to_provider='None', status='READY', retrieved=1, found=1, json_results=[], time=0.0) + result.save() + return 0 logger.info(f'RAGTITLE: {self.search.query_string_processed}') logger.info(f'RAGBODY: {model_response}') From 90a0b57ccc773a48e46fe418ed67f92370122efb Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Mon, 30 Oct 2023 11:36:39 -0400 Subject: [PATCH 08/88] cherry picked from prerelease main --- swirl/processors/rag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swirl/processors/rag.py b/swirl/processors/rag.py index 9be5eab61..a92ec5baf 100644 --- a/swirl/processors/rag.py +++ b/swirl/processors/rag.py @@ -29,7 +29,7 @@ MODEL_TOK_MAX = MODEL_3_TOK_MAX MODEL_DEFAULT_SYSTEM_GUIDE = "You are a helpful assistant who considers recent information when answering questions." FETCH_TO_SECS=10 -DO_MESSAGE_MOCK_ON_ERROR=True +DO_MESSAGE_MOCK_ON_ERROR=False MESSAGE_MOCK_ON_ERROR=f"Mock API resposne from {MODEL}. This is a mock response for testing purpose only." from celery.utils.log import get_task_logger From a6b600ae4b621a2d0a962b51937932a3fe64fc69 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Mon, 30 Oct 2023 13:17:51 -0400 Subject: [PATCH 09/88] fix typo in install-ui.sh --- install-ui.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install-ui.sh b/install-ui.sh index 4f6207d48..5b2f6a650 100755 --- a/install-ui.sh +++ b/install-ui.sh @@ -3,7 +3,7 @@ # Usage: # install-ui.sh [] # -# Install the UI into Swirl static directory. The Swril install and setup must be run +# Install the UI into Swirl static directory. The Swirl install and setup must be run # before this command can be used. # # Options: From a22c0cf35a4c0275e7b3b74058221b0f301dc610 Mon Sep 17 00:00:00 2001 From: Harshil Khamar <73790584+Harshil0512@users.noreply.github.com> Date: Tue, 31 Oct 2023 13:29:17 +0530 Subject: [PATCH 10/88] Duck Duck Go Search Provider Added --- SearchProviders/duck_duck_go.json | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 SearchProviders/duck_duck_go.json diff --git a/SearchProviders/duck_duck_go.json b/SearchProviders/duck_duck_go.json new file mode 100644 index 000000000..0b3a8dfe3 --- /dev/null +++ b/SearchProviders/duck_duck_go.json @@ -0,0 +1,19 @@ +{ + "name": "DuckDuckGo", + "active": false, + "default": false, + "connector": "RequestsGet", + "url": "https://api.duckduckgo.com/", + "query_template": "{url}?q={query_string}&format=json&pretty=1", + "query_processors": ["AdaptiveQueryProcessor"], + "query_mappings": "", + "result_processors": [ + "MappingResultProcessor", + "LenLimitingResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "RESULTS=results", + "result_mappings": "title=Heading, body=RelatedTopics", + "results_per_query": 10, + "tags": ["DuckDuckGo", "Search"] +} From f3358f915f500d759aff369769f8bf5d4a01c221 Mon Sep 17 00:00:00 2001 From: Harshil Khamar Date: Tue, 31 Oct 2023 14:02:08 +0530 Subject: [PATCH 11/88] Yelp Search Added --- SearchProviders/yelp.json | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 SearchProviders/yelp.json diff --git a/SearchProviders/yelp.json b/SearchProviders/yelp.json new file mode 100644 index 000000000..9a9a88760 --- /dev/null +++ b/SearchProviders/yelp.json @@ -0,0 +1,20 @@ +{ + "name": "Yelp", + "active": false, + "default": false, + "connector": "RequestsGet", + "url": "https://api.yelp.com/v3/businesses/search", + "query_template": "{url}?sort_by=best_match&limit=20&location={query_string}&format=json&pretty=1", + "query_processors": ["AdaptiveQueryProcessor"], + "query_mappings": "", + "result_processors": [ + "MappingResultProcessor", + "LenLimitingResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "RESULTS=results", + "result_mappings": "regin=regin, body=businesses", + "results_per_query": 10, + "credentials": "bearer=", + "tags": ["Yelp", "Search"] + } \ No newline at end of file From 7499859835e2af42b6c3c2786c1a8e2da6da3985 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Wed, 1 Nov 2023 11:55:36 -0400 Subject: [PATCH 12/88] move contributed SP to untested/ dir --- SearchProviders/{ => untested}/vespa.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename SearchProviders/{ => untested}/vespa.json (100%) diff --git a/SearchProviders/vespa.json b/SearchProviders/untested/vespa.json similarity index 100% rename from SearchProviders/vespa.json rename to SearchProviders/untested/vespa.json From be18a95153d6865c607221873d77b1c57abf04aa Mon Sep 17 00:00:00 2001 From: Harsh Mahajan <127186841+HarshMN2345@users.noreply.github.com> Date: Wed, 1 Nov 2023 21:57:16 +0530 Subject: [PATCH 13/88] Update trello.json --- SearchProviders/untested/trello.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SearchProviders/untested/trello.json b/SearchProviders/untested/trello.json index 2edb676f2..4edcb0f53 100644 --- a/SearchProviders/untested/trello.json +++ b/SearchProviders/untested/trello.json @@ -13,7 +13,7 @@ "CosineRelevancyResultProcessor" ], "response_mappings": "RESULTS=cards", - "result_mappings": "title=name,body=desc", + "result_mappings": "title=name,body=desc,url=url,author=username,date_published=dateLastActivity", "credentials": "bearer=", "tags": ["Trello"] } From 2d5b05d8c28f336f01c66ac5ed2625894d3508ce Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Thu, 2 Nov 2023 15:44:57 +0100 Subject: [PATCH 14/88] fix default qrx_type value --- swirl/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swirl/models.py b/swirl/models.py index 837f3cf0f..971ae554d 100644 --- a/swirl/models.py +++ b/swirl/models.py @@ -245,7 +245,7 @@ class QueryTransform(models.Model) : ('synonym', 'Synonym' ), ('bag', 'Synonym Bag' ) ] - qrx_type = models.CharField(max_length=64, default='', choices=QUERY_TRASNSFORM_TYPE_CHOICES) + qrx_type = models.CharField(max_length=64, default='rewrite', choices=QUERY_TRASNSFORM_TYPE_CHOICES) config_content = models.TextField() class Meta: unique_together = [ From c642464f073ee06534399917924a440b877359dc Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Thu, 2 Nov 2023 15:49:29 +0100 Subject: [PATCH 15/88] openapi swagger reimplementation --- requirements.txt | 3 ++- swirl/middleware.py | 23 ++++++++++++++++++++--- swirl/templates/swagger-ui.html | 28 ---------------------------- swirl/urls.py | 28 ++++++++++++++++------------ swirl/views.py | 4 +++- swirl_server/settings.py | 5 ++++- 6 files changed, 45 insertions(+), 46 deletions(-) delete mode 100644 swirl/templates/swagger-ui.html diff --git a/requirements.txt b/requirements.txt index 42f46ff3a..c7034a2b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,4 +29,5 @@ readability-lxml tiktoken channels channels-redis -tika \ No newline at end of file +tika +drf-yasg \ No newline at end of file diff --git a/swirl/middleware.py b/swirl/middleware.py index 146caa154..9540b4a38 100644 --- a/swirl/middleware.py +++ b/swirl/middleware.py @@ -1,12 +1,13 @@ from rest_framework.authtoken.models import Token -from django.http import HttpResponseForbidden +from django.http import HttpResponseForbidden, HttpResponse from swirl.models import Search from swirl.authenticators import * from channels.middleware import BaseMiddleware from channels.db import database_sync_to_async from urllib.parse import parse_qs from django.core.exceptions import ObjectDoesNotExist - +import json +import yaml import jwt import logging as logger @@ -106,4 +107,20 @@ def get_search_by_id_and_user(self, search_id, user): try: return Search.objects.filter(pk=search_id, owner=user).exists() except ObjectDoesNotExist: - return None \ No newline at end of file + return None + +class SwaggerMiddleware: + def __init__(self, get_response): + self.get_response = get_response + + def __call__(self, request): + format = request.GET.get('format') + if '/swirl/swagger' in request.path and format and format == 'openapi': + response = self.get_response(request) + if response.status_code == 200: + openapi_data = json.loads(response.content) + yaml_content = yaml.dump(openapi_data, default_flow_style=False) + response = HttpResponse(yaml_content, content_type='text/yaml') + return response + return self.get_response(request) + return self.get_response(request) \ No newline at end of file diff --git a/swirl/templates/swagger-ui.html b/swirl/templates/swagger-ui.html deleted file mode 100644 index f726bab99..000000000 --- a/swirl/templates/swagger-ui.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - Swirl Swagger - - - - - -
- - - - diff --git a/swirl/urls.py b/swirl/urls.py index bad2e94f2..7dfd8a976 100644 --- a/swirl/urls.py +++ b/swirl/urls.py @@ -6,11 +6,11 @@ # from webbrowser import get from django.urls import include, path -from django.views.generic import TemplateView -from rest_framework.schemas import get_schema_view -from rest_framework import routers +from rest_framework import routers, permissions from . import views from swirl.authenticators import Microsoft +from drf_yasg.views import get_schema_view +from drf_yasg import openapi router = routers.DefaultRouter() router.register(r'users', views.UserViewSet) @@ -25,16 +25,20 @@ router.register(r'sapi/authenticators', views.AuthenticatorViewSet, basename='galaxy-authenticators') router.register(r'sapi/searchproviders', views.SearchProviderViewSet, basename='galaxy-searchproviders'), +schema_view = get_schema_view( + openapi.Info( + title="Swirl Swagger", + default_version="v1", + description="Swirl API descriptions", + ), + public=True, + permission_classes=(permissions.AllowAny,), +) + + urlpatterns = [ - path('openapi', get_schema_view( - title="Swirl Swagger", - description="Swirl API descriptions", - version="1.1.0" - ), name='openapi-schema'), - path('swagger-ui/', TemplateView.as_view( - template_name='swagger-ui.html', - extra_context={'schema_url':'openapi-schema'} - ), name='swagger-ui'), + path('swagger/', schema_view.with_ui(cache_timeout=0), + name='schema-swagger-ui'), path('query_transform_form/', views.query_transform_form, name='query_transform_form'), # this appears to be necessary to access the view from a pytest API unit test diff --git a/swirl/views.py b/swirl/views.py index 7c476fe79..a4aed2aee 100644 --- a/swirl/views.py +++ b/swirl/views.py @@ -170,6 +170,9 @@ class Meta: class AuthenticatorViewSet(viewsets.ModelViewSet): serializer_class = AuthenticatorSerializer + def get_queryset(self): + return AuthenticatorModel.objects.all() + def list(self, request): return return_authenticators_list(request) @@ -567,7 +570,6 @@ def list(self, request): return Response(paginate(serializer.data, self.request), status=status.HTTP_200_OK) ######################################## - def create(self, request): # check permissions diff --git a/swirl_server/settings.py b/swirl_server/settings.py index a097bf824..3977cc514 100644 --- a/swirl_server/settings.py +++ b/swirl_server/settings.py @@ -13,6 +13,7 @@ from pathlib import Path import environ import os +from drf_yasg.inspectors import CamelCaseJSONFilter, ReferencingSerializerInspector # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent @@ -50,7 +51,8 @@ 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', - 'django.contrib.staticfiles' + 'django.contrib.staticfiles', + 'drf_yasg' ] ASGI_APPLICATION = 'swirl_server.routing.application' @@ -69,6 +71,7 @@ 'django.contrib.auth.middleware.AuthenticationMiddleware', 'swirl.middleware.TokenMiddleware', 'swirl.middleware.SpyglassAuthenticatorsMiddleware', + 'swirl.middleware.SwaggerMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', ] From 4ca8c6a93a6e1d2b80370e1e4f0fcc5399293fbb Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Thu, 2 Nov 2023 11:21:51 -0400 Subject: [PATCH 16/88] update development workflows with branch persistences, etc. --- .../docker-image-spg-experimental.yml | 14 ++++- .../workflows/docker-image-spg-preview.yml | 12 +++- .github/workflows/docker-image-spg.yml | 55 +++++++++++++++++- .github/workflows/docker-image.yml | 47 +++++++++++---- .github/workflows/integration-api-tests.yml | 54 +++++++++++++++--- .github/workflows/smoke-tests.yml | 57 +++++++++++++++---- .github/workflows/unit-tests.yml | 33 +++++++++-- 7 files changed, 233 insertions(+), 39 deletions(-) diff --git a/.github/workflows/docker-image-spg-experimental.yml b/.github/workflows/docker-image-spg-experimental.yml index f2c65e5d6..2f007598c 100644 --- a/.github/workflows/docker-image-spg-experimental.yml +++ b/.github/workflows/docker-image-spg-experimental.yml @@ -1,8 +1,7 @@ name: EXPERIMENTAL Latest Spyglass Docker Build on: - schedule: - - cron: '3 * * * *' + # Allows you to run this workflow manually from the Actions tab workflow_dispatch: # For debugging @@ -17,10 +16,19 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: Checkout the code + uses: actions/checkout@v4 - name: Build the Docker image run: docker build --no-cache -t swirlai/spyglass:fork-x -f Dockerfile.fork.spg . - name: login to docker hub run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - name: Push the Docker image run: docker push swirlai/spyglass:fork-x + - name: Upload log files + if: always() + uses: actions/upload-artifact@v3 + with: + name: log-files + path: | + logs/ + /var/log/syslog* diff --git a/.github/workflows/docker-image-spg-preview.yml b/.github/workflows/docker-image-spg-preview.yml index 7e5383fcf..0a3601fe3 100644 --- a/.github/workflows/docker-image-spg-preview.yml +++ b/.github/workflows/docker-image-spg-preview.yml @@ -1,6 +1,7 @@ name: PREVIEW Latest Spyglass Docker Build on: + # Allows you to run this workflow manually from the Actions tab workflow_dispatch: # For debugging @@ -15,10 +16,19 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: Checkout the code + uses: actions/checkout@v4 - name: Build the Docker image run: docker build --no-cache -t swirlai/spyglass:preview -f Dockerfile.develop.spg . - name: login to docker hub run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - name: Push the Docker image run: docker push swirlai/spyglass:preview + - name: Upload log files + if: always() + uses: actions/upload-artifact@v3 + with: + name: log-files + path: | + logs/ + /var/log/syslog* diff --git a/.github/workflows/docker-image-spg.yml b/.github/workflows/docker-image-spg.yml index ce34537e9..6b4971f74 100644 --- a/.github/workflows/docker-image-spg.yml +++ b/.github/workflows/docker-image-spg.yml @@ -1,6 +1,11 @@ -name: Latest Spyglass Docker Build +name: LatestSpyglassDockerBuild on: + workflow_run: + workflows: [IntegrationAPITests] + types: + - completed + # Allow manual run of this workflow from the Actions tab workflow_dispatch: # For debugging @@ -11,14 +16,58 @@ on: jobs: build: - + if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: Download branch and run_id artifacts + uses: dawidd6/action-download-artifact@v2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + workflow: integration-api-tests.yml + name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} + path: ./artifacts + continue-on-error: true # Allow the step to fail without stopping the workflow + - name: Determine branch for checkout + id: determine_branch + run: | + if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then + echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV + echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV + else + BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) + echo "branch=$BRANCH_NAME" >> $GITHUB_ENV + fi + - name: Print branch to be checked out + run: | + echo "Branch to checkout: ${{ env.branch }}" + - name: Checkout the code + uses: actions/checkout@v4 + with: + ref: ${{ env.branch }} - name: Build the Docker image run: docker build -t swirlai/spyglass:latest -f Dockerfile.spg . - name: login to docker hub run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - name: Push the Docker image run: docker push swirlai/spyglass + - name: Ensure artifacts directory exists and write branch and run_id again + run: | + mkdir -p ./artifacts + echo "${{ env.branch }}" > ./artifacts/branch.txt + echo "${{ env.original_run_id }}" > ./artifacts/run_id.txt + - name: Re-upload branch and run_id for subsequent workflows + uses: actions/upload-artifact@v3 + with: + name: branch-info-${{ github.run_id }} + path: | + ./artifacts/branch.txt + ./artifacts/run_id.txt + - name: Upload log files + if: always() + uses: actions/upload-artifact@v3 + with: + name: log-files + path: | + logs/ + /var/log/syslog* \ No newline at end of file diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 61c333f44..ba3f9f83d 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -1,23 +1,50 @@ -name: Docker Build - -# Build a multi-arch docker image for Swirl -# comment to start build +name: DockerBuild on: + workflow_run: + workflows: [LatestSpyglassDockerBuild] + types: + - completed + # Allow manual run of this workflow from the Actions tab workflow_dispatch: - push: - # only trigger on branches, not on tags - branches: 'develop' +# For debugging +# on: +# push: +# branches: '' jobs: build: - + if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: Download branch and run_id artifacts + uses: dawidd6/action-download-artifact@v2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + workflow: docker-image-spg.yml + name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} + path: ./artifacts + continue-on-error: true # Allow the step to fail without stopping the workflow + - name: Determine branch for checkout + id: determine_branch + run: | + if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then + echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV + echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV + else + BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) + echo "branch=$BRANCH_NAME" >> $GITHUB_ENV + fi + - name: Print branch to be checked out + run: | + echo "Branch to checkout: ${{ env.branch }}" + - name: Checkout the code + uses: actions/checkout@v4 + with: + ref: ${{ env.branch }} - name: login to docker hub run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - name: builder bootstrap @@ -32,7 +59,7 @@ jobs: repository: swirlai/swirl-search - name: Upload log files if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: log-files path: | diff --git a/.github/workflows/integration-api-tests.yml b/.github/workflows/integration-api-tests.yml index cb424481a..a5f9154bd 100644 --- a/.github/workflows/integration-api-tests.yml +++ b/.github/workflows/integration-api-tests.yml @@ -1,17 +1,45 @@ -name: Ingegration API Tests +name: IntegrationAPITests on: - schedule: - - cron: '05 05 * * *' - push: - branches: 'develop' + workflow_run: + workflows: [SmokeTests] + types: + - completed + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: jobs: + build: + if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: Download branch and run_id artifacts + uses: dawidd6/action-download-artifact@v2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + workflow: smoke-tests.yml + name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} + path: ./artifacts + continue-on-error: true # Allow the step to fail without stopping the workflow + - name: Determine branch for checkout + id: determine_branch + run: | + if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then + echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV + echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV + else + BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) + echo "branch=$BRANCH_NAME" >> $GITHUB_ENV + fi + - name: Print branch to be checked out + run: | + echo "Branch to checkout: ${{ env.branch }}" + - name: Checkout the code + uses: actions/checkout@v4 + with: + ref: ${{ env.branch }} - name: Set up Python uses: actions/setup-python@v4 with: @@ -35,9 +63,21 @@ jobs: run: python swirl.py start - name: Run Integrated API tests run: docker run --net=host -t swirlai/swirl-testing:latest-integrated-api sh -c "behave --tags=integrated_api" + - name: Ensure artifacts directory exists and write branch and run_id again + run: | + mkdir -p ./artifacts + echo "${{ env.branch }}" > ./artifacts/branch.txt + echo "${{ env.original_run_id }}" > ./artifacts/run_id.txt + - name: Re-upload branch and run_id for subsequent workflows + uses: actions/upload-artifact@v3 + with: + name: branch-info-${{ github.run_id }} + path: | + ./artifacts/branch.txt + ./artifacts/run_id.txt - name: Upload log files if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: log-files path: | diff --git a/.github/workflows/smoke-tests.yml b/.github/workflows/smoke-tests.yml index 75c9727ad..86fad148c 100644 --- a/.github/workflows/smoke-tests.yml +++ b/.github/workflows/smoke-tests.yml @@ -1,20 +1,45 @@ -name: Smoke Test +name: SmokeTests - -on: # build at 05:05 UTC each day - schedule: - - cron: '05 05 * * *' - push: - branches: 'develop' +on: + workflow_run: + workflows: [UnitTests] + types: + - completed + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: jobs: build: - + if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: Download branch and run_id artifacts + uses: dawidd6/action-download-artifact@v2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + workflow: unit-tests.yml + name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} + path: ./artifacts + continue-on-error: true # Allow the step to fail without stopping the workflow + - name: Determine branch for checkout + id: determine_branch + run: | + if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then + echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV + echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV + else + BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) + echo "branch=$BRANCH_NAME" >> $GITHUB_ENV + fi + - name: Print branch to be checked out + run: | + echo "Branch to checkout: ${{ env.branch }}" + - name: Checkout the code + uses: actions/checkout@v4 + with: + ref: ${{ env.branch }} - name: Set up Python uses: actions/setup-python@v4 with: @@ -34,9 +59,21 @@ jobs: run: python swirl.py start - name: Run Smoke tests run: docker run --net=host -t swirlai/swirl-testing:latest-smoke-test sh -c "behave **/docker_container/*.feature --tags=docker_api_smoke" + - name: Ensure artifacts directory exists and write branch and run_id again + run: | + mkdir -p ./artifacts + echo "${{ env.branch }}" > ./artifacts/branch.txt + echo "${{ env.original_run_id }}" > ./artifacts/run_id.txt + - name: Re-upload branch and run_id for subsequent workflows + uses: actions/upload-artifact@v3 + with: + name: branch-info-${{ github.run_id }} + path: | + ./artifacts/branch.txt + ./artifacts/run_id.txt - name: Upload log files if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: log-files path: | diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 92bd4de78..a1063cad6 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -1,18 +1,23 @@ -name: Unit Tests +name: UnitTests on: push: - # only trigger on branches, not on tags + paths-ignore: + - '.github/**' + - 'README.md' + # Only trigger on branches, not on tags branches: 'develop' + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: jobs: build: - runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - name: Checkout the code + uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v4 with: @@ -24,9 +29,27 @@ jobs: run: ./install-test.sh - name: Run pytest unit tests run: pytest + - name: Create artifacts directory + run: mkdir -p artifacts + - name: Set branch name + id: extract_branch + run: | + BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) + echo "branch=$BRANCH_NAME" >> $GITHUB_ENV + - name: Write branch and run_id to file + run: | + echo "${{ env.branch }}" > ./artifacts/branch.txt + echo "${{ github.run_id }}" > ./artifacts/run_id.txt + - name: Upload branch and run_id files as artifact + uses: actions/upload-artifact@v3 + with: + name: branch-info-${{ github.run_id }} + path: | + ./artifacts/branch.txt + ./artifacts/run_id.txt - name: Upload log files if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: log-files path: | From 3edba5eeb6bd8cbcf2b176d5b12df80d26744cec Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Thu, 2 Nov 2023 15:24:01 -0400 Subject: [PATCH 17/88] cherry-pick Readme updates from main --- README.md | 40 +++++++++++----------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index 89eec1339..3f4e31a2c 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,12 @@ -[![Swirl](https://docs.swirl.today/images/hack_swirl_fest.png)](https://join.slack.com/t/swirlmetasearch/shared_invite/zt-1qk7q02eo-kpqFAbiZJGOdqgYVvR1sfw) -
[![Swirl](https://docs.swirl.today/images/transparent_header_3.png)](https://www.swirl.today) -
- -

Swirl

- -
+

Swirl

### Swirl is open source software that simultaneously searches multiple content sources and returns AI ranked results. -
- - -[πš‚πšπšŠπš›πš πš‚πšŽπšŠπš›πšŒπš‘πš’πš—πš](#-try-swirl-now-in-docker) ⦁ -[πš‚πš•πšŠπšŒπš”](https://join.slack.com/t/swirlmetasearch/shared_invite/zt-1qk7q02eo-kpqFAbiZJGOdqgYVvR1sfw) ⦁ -[π™ΊπšŽπš’ π™΅πšŽπšŠπšπšžπš›πšŽπšœ ](#-key-features) ⦁ -[π™²πš˜πš—πšπš›πš’πš‹πšžπšπšŽ](#-contributing-to-swirl) ⦁ -[π™³πš˜πšŒπšžπš–πšŽπš—πšπšŠπšπš’πš˜πš—](#-documentation) ⦁ [π™²πš˜πš—πš—πšŽπšŒπšπš˜πš›πšœ](#-list-of-connectors) - - +[Start Searching](#-try-swirl-now-in-docker) Β· [Slack](https://join.slack.com/t/swirlmetasearch/shared_invite/zt-1qk7q02eo-kpqFAbiZJGOdqgYVvR1sfw) Β· [Key Features ](#-key-features) Β· [Contribute](#-contributing-to-swirl) Β· [Documentation](#-documentation) Β· [Connectors](#-list-of-connectors)
--- @@ -30,11 +15,10 @@ [![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-blue.svg?color=088395&logoColor=blue&style=flat-square)](https://opensource.org/license/apache-2-0/) [![GitHub Release](https://img.shields.io/github/v/release/swirlai/swirl-search?style=flat-square&color=8DDFCB&label=Release)](https://github.com/swirlai/swirl-search/releases) -[![Slack](https://custom-icon-badges.demolab.com/badge/Join%20Our%20Slack-black?style=flat-square&logo=slack&color=0E21A0&logoColor=white)](https://join.slack.com/t/swirlmetasearch/shared_invite/zt-1qk7q02eo-kpqFAbiZJGOdqgYVvR1sfw) -[![PRs Welcome](https://custom-icon-badges.demolab.com/badge/PRs%20Welcome-black?style=flat-square&logo=github&color=4D2DB7&logoColor=C70039)](#contributing-to-swirl) -[![Website](https://custom-icon-badges.demolab.com/badge/www.swirl.today-black?style=flat-square&logo=globe&color=241468&logoColor=white)](https://www.swirl.today) [![Docker Build](https://github.com/swirlai/swirl-search/actions/workflows/docker-image.yml/badge.svg?style=flat-square&branch=main)](https://github.com/swirlai/swirl-search/actions/workflows/docker-image.yml) [![Tests](https://github.com/swirlai/swirl-search/actions/workflows/smoke-tests.yml/badge.svg?branch=main)](https://github.com/swirlai/swirl-search/actions/workflows/smoke-tests.yml) +[![Slack](https://custom-icon-badges.demolab.com/badge/Join%20Our%20Slack-black?style=flat-square&logo=slack&color=0E21A0&logoColor=white)](https://join.slack.com/t/swirlmetasearch/shared_invite/zt-1qk7q02eo-kpqFAbiZJGOdqgYVvR1sfw) +[![Website](https://custom-icon-badges.demolab.com/badge/www.swirl.today-black?style=flat-square&logo=globe&color=241468&logoColor=white)](https://www.swirl.today)
@@ -42,9 +26,9 @@ Swirl is open source software that simultaneously searches multiple content sour Swirl can connect to: -* Databases (SQL & NoSQL, Google BigQuery) -* Public data services (Google Programmable Search, Arxiv.org, etc.) -* Enterprise sources (Microsoft 365, Jira, Miro etc.) +* Databases (SQL, NoSQL, Google BigQuery) +* Public data services (Google Programmable Search Engines, ArXiv.org, etc.) +* Enterprise sources (Microsoft 365, Jira, Miro, etc.) And generate insights with AI and LLMs like ChatGPT. Start discovering and generating the answers you need based on your data. @@ -63,7 +47,7 @@ _Swirl with ChatGPT as a configured AI Model._ # πŸ”Ž How Swirl Works -Swirl adapts and distributes user queries to anything with a search API - search engines, databases, noSQL engines, cloud/SaaS services, data siloes, etc. And uses Large Language Models to re-rank the unified results *without* extracting or indexing *anything*. +Swirl adapts and distributes user queries to anything with a search API - search engines, databases, noSQL engines, cloud/SaaS services, data siloes, etc. and uses Large Language Models to re-rank the unified results *without* extracting or indexing *anything*. ![Swirl Diagram](https://docs.swirl.today/images/Animation_2.gif) @@ -92,7 +76,7 @@ Swirl adapts and distributes user queries to anything with a search API - search > **Warning** > Make sure the Docker app is running before proceeding! -* Download [https://raw.githubusercontent.com/swirlai/swirl-search/main/docker-compose.yaml](https://raw.githubusercontent.com/swirlai/swirl-search/main/docker-compose.yaml) +* Download the YML file: [https://raw.githubusercontent.com/swirlai/swirl-search/main/docker-compose.yaml](https://raw.githubusercontent.com/swirlai/swirl-search/main/docker-compose.yaml) ``` curl https://raw.githubusercontent.com/swirlai/swirl-search/main/docker-compose.yaml -o docker-compose.yaml @@ -174,23 +158,21 @@ The most recent Search object will be displayed at the top. Click on the `result | βŒ› | [Optional search/result expiration service](https://docs.swirl.today/Admin-Guide.html#search-expiration-service) to limit storage use | | πŸ”Œ | Easily extensible [Connector](https://github.com/swirlai/swirl-search/tree/main/swirl/connectors) and [Mixer](https://github.com/swirlai/swirl-search/tree/main/swirl/mixers) objects | -
# πŸ‘©β€πŸ’» Contributing to Swirl -**Do you have a brilliant idea or improvement for Swirl?** We're all earsβ€”and thrilled you're here to help! +**Do you have a brilliant idea or improvement for Swirl?** We're all ears, and thrilled you're here to help! πŸ”— **Get Started in 3 Easy Steps**: 1. **Connect with Fellow Enthusiasts** - Jump into the [Swirl Slack Community](https://join.slack.com/t/swirlmetasearch/shared_invite/zt-1qk7q02eo-kpqFAbiZJGOdqgYVvR1sfw) and share your ideas. You'll find a welcoming group of Swirl enthusiasts and team members eager to assist and collaborate. -2. **Branch It Out** - Always branch off from the `develop` branch with a descriptive name that encapsulates your idea or fix. Remember, all PRs should be made to the `develop` branch to ensure `main` remains our stable gold-standard. +2. **Branch It Out** - Always branch off from the `develop` branch with a descriptive name that encapsulates your idea or fix. 3. **Start Your Contribution** - Ready to get your hands dirty? Make sure all contributions come through a GitHub pull request. We roughly follow the [Gitflow branching model](https://nvie.com/posts/a-successful-git-branching-model/), so all changes destined for the next release should be made to the `develop` branch. πŸ“š **First time contributing on GitHub?** No worries, the [GitHub documentation](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) has you covered with a great guide on contributing to projects. πŸ’‘ Every contribution, big or small, makes a difference. Join us in shaping the future of Swirl! -
# ☁ Use the Swirl Cloud From c4249036c973c706e26191aa16bd49d93025c816 Mon Sep 17 00:00:00 2001 From: Harshil Khamar Date: Fri, 3 Nov 2023 03:56:44 +0530 Subject: [PATCH 18/88] Changes done --- SearchProviders/{ => untested}/duck_duck_go.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) rename SearchProviders/{ => untested}/duck_duck_go.json (76%) diff --git a/SearchProviders/duck_duck_go.json b/SearchProviders/untested/duck_duck_go.json similarity index 76% rename from SearchProviders/duck_duck_go.json rename to SearchProviders/untested/duck_duck_go.json index 0b3a8dfe3..1fab358e3 100644 --- a/SearchProviders/duck_duck_go.json +++ b/SearchProviders/untested/duck_duck_go.json @@ -12,8 +12,8 @@ "LenLimitingResultProcessor", "CosineRelevancyResultProcessor" ], - "response_mappings": "RESULTS=results", - "result_mappings": "title=Heading, body=RelatedTopics", + "response_mappings": "RESULTS=RelatedTopics", + "result_mappings": "url=meta['developer']['url'],body=snippet,author=meta['developer']['name']", "results_per_query": 10, "tags": ["DuckDuckGo", "Search"] } From cb2cb251579a796e85436f3ca46669ba8f3e3cb8 Mon Sep 17 00:00:00 2001 From: Harshil Khamar Date: Fri, 3 Nov 2023 04:22:20 +0530 Subject: [PATCH 19/88] Changes Completed --- SearchProviders/{ => untested}/yelp.json | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) rename SearchProviders/{ => untested}/yelp.json (80%) diff --git a/SearchProviders/yelp.json b/SearchProviders/untested/yelp.json similarity index 80% rename from SearchProviders/yelp.json rename to SearchProviders/untested/yelp.json index 9a9a88760..1db708de1 100644 --- a/SearchProviders/yelp.json +++ b/SearchProviders/untested/yelp.json @@ -9,11 +9,10 @@ "query_mappings": "", "result_processors": [ "MappingResultProcessor", - "LenLimitingResultProcessor", "CosineRelevancyResultProcessor" ], - "response_mappings": "RESULTS=results", - "result_mappings": "regin=regin, body=businesses", + "response_mappings": "FOUND=total,RESULTS=results", + "result_mappings": "title=term,body=businesses", "results_per_query": 10, "credentials": "bearer=", "tags": ["Yelp", "Search"] From 9748ba03e558d6f624b630f09dd10aa7b0ca490f Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Fri, 3 Nov 2023 09:37:09 -0400 Subject: [PATCH 20/88] removing 2 workflows from develop --- .../docker-image-spg-experimental.yml | 34 ------------------- .../workflows/docker-image-spg-preview.yml | 34 ------------------- 2 files changed, 68 deletions(-) delete mode 100644 .github/workflows/docker-image-spg-experimental.yml delete mode 100644 .github/workflows/docker-image-spg-preview.yml diff --git a/.github/workflows/docker-image-spg-experimental.yml b/.github/workflows/docker-image-spg-experimental.yml deleted file mode 100644 index 2f007598c..000000000 --- a/.github/workflows/docker-image-spg-experimental.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: EXPERIMENTAL Latest Spyglass Docker Build - -on: - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# For debugging -# on: -# push: -# branches: '' - -jobs: - - build: - - runs-on: ubuntu-latest - - steps: - - name: Checkout the code - uses: actions/checkout@v4 - - name: Build the Docker image - run: docker build --no-cache -t swirlai/spyglass:fork-x -f Dockerfile.fork.spg . - - name: login to docker hub - run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - - name: Push the Docker image - run: docker push swirlai/spyglass:fork-x - - name: Upload log files - if: always() - uses: actions/upload-artifact@v3 - with: - name: log-files - path: | - logs/ - /var/log/syslog* diff --git a/.github/workflows/docker-image-spg-preview.yml b/.github/workflows/docker-image-spg-preview.yml deleted file mode 100644 index 0a3601fe3..000000000 --- a/.github/workflows/docker-image-spg-preview.yml +++ /dev/null @@ -1,34 +0,0 @@ -name: PREVIEW Latest Spyglass Docker Build - -on: - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -# For debugging -# on: -# push: -# branches: '' - -jobs: - - build: - - runs-on: ubuntu-latest - - steps: - - name: Checkout the code - uses: actions/checkout@v4 - - name: Build the Docker image - run: docker build --no-cache -t swirlai/spyglass:preview -f Dockerfile.develop.spg . - - name: login to docker hub - run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - - name: Push the Docker image - run: docker push swirlai/spyglass:preview - - name: Upload log files - if: always() - uses: actions/upload-artifact@v3 - with: - name: log-files - path: | - logs/ - /var/log/syslog* From f6563851b70db2475dbdf5f1a47e427cf4ac499e Mon Sep 17 00:00:00 2001 From: Erik Spears <98238295+erikspears@users.noreply.github.com> Date: Fri, 3 Nov 2023 10:40:30 -0400 Subject: [PATCH 21/88] Revert "Ds 1009 - Resolve errors in Swirl OpenAPI Specification" --- requirements.txt | 3 +-- swirl/middleware.py | 23 +++-------------------- swirl/models.py | 2 +- swirl/templates/swagger-ui.html | 28 ++++++++++++++++++++++++++++ swirl/urls.py | 28 ++++++++++++---------------- swirl/views.py | 4 +--- swirl_server/settings.py | 5 +---- 7 files changed, 47 insertions(+), 46 deletions(-) create mode 100644 swirl/templates/swagger-ui.html diff --git a/requirements.txt b/requirements.txt index c7034a2b4..42f46ff3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,5 +29,4 @@ readability-lxml tiktoken channels channels-redis -tika -drf-yasg \ No newline at end of file +tika \ No newline at end of file diff --git a/swirl/middleware.py b/swirl/middleware.py index 9540b4a38..146caa154 100644 --- a/swirl/middleware.py +++ b/swirl/middleware.py @@ -1,13 +1,12 @@ from rest_framework.authtoken.models import Token -from django.http import HttpResponseForbidden, HttpResponse +from django.http import HttpResponseForbidden from swirl.models import Search from swirl.authenticators import * from channels.middleware import BaseMiddleware from channels.db import database_sync_to_async from urllib.parse import parse_qs from django.core.exceptions import ObjectDoesNotExist -import json -import yaml + import jwt import logging as logger @@ -107,20 +106,4 @@ def get_search_by_id_and_user(self, search_id, user): try: return Search.objects.filter(pk=search_id, owner=user).exists() except ObjectDoesNotExist: - return None - -class SwaggerMiddleware: - def __init__(self, get_response): - self.get_response = get_response - - def __call__(self, request): - format = request.GET.get('format') - if '/swirl/swagger' in request.path and format and format == 'openapi': - response = self.get_response(request) - if response.status_code == 200: - openapi_data = json.loads(response.content) - yaml_content = yaml.dump(openapi_data, default_flow_style=False) - response = HttpResponse(yaml_content, content_type='text/yaml') - return response - return self.get_response(request) - return self.get_response(request) \ No newline at end of file + return None \ No newline at end of file diff --git a/swirl/models.py b/swirl/models.py index 971ae554d..837f3cf0f 100644 --- a/swirl/models.py +++ b/swirl/models.py @@ -245,7 +245,7 @@ class QueryTransform(models.Model) : ('synonym', 'Synonym' ), ('bag', 'Synonym Bag' ) ] - qrx_type = models.CharField(max_length=64, default='rewrite', choices=QUERY_TRASNSFORM_TYPE_CHOICES) + qrx_type = models.CharField(max_length=64, default='', choices=QUERY_TRASNSFORM_TYPE_CHOICES) config_content = models.TextField() class Meta: unique_together = [ diff --git a/swirl/templates/swagger-ui.html b/swirl/templates/swagger-ui.html new file mode 100644 index 000000000..f726bab99 --- /dev/null +++ b/swirl/templates/swagger-ui.html @@ -0,0 +1,28 @@ + + + + Swirl Swagger + + + + + +
+ + + + diff --git a/swirl/urls.py b/swirl/urls.py index 7dfd8a976..bad2e94f2 100644 --- a/swirl/urls.py +++ b/swirl/urls.py @@ -6,11 +6,11 @@ # from webbrowser import get from django.urls import include, path -from rest_framework import routers, permissions +from django.views.generic import TemplateView +from rest_framework.schemas import get_schema_view +from rest_framework import routers from . import views from swirl.authenticators import Microsoft -from drf_yasg.views import get_schema_view -from drf_yasg import openapi router = routers.DefaultRouter() router.register(r'users', views.UserViewSet) @@ -25,20 +25,16 @@ router.register(r'sapi/authenticators', views.AuthenticatorViewSet, basename='galaxy-authenticators') router.register(r'sapi/searchproviders', views.SearchProviderViewSet, basename='galaxy-searchproviders'), -schema_view = get_schema_view( - openapi.Info( - title="Swirl Swagger", - default_version="v1", - description="Swirl API descriptions", - ), - public=True, - permission_classes=(permissions.AllowAny,), -) - - urlpatterns = [ - path('swagger/', schema_view.with_ui(cache_timeout=0), - name='schema-swagger-ui'), + path('openapi', get_schema_view( + title="Swirl Swagger", + description="Swirl API descriptions", + version="1.1.0" + ), name='openapi-schema'), + path('swagger-ui/', TemplateView.as_view( + template_name='swagger-ui.html', + extra_context={'schema_url':'openapi-schema'} + ), name='swagger-ui'), path('query_transform_form/', views.query_transform_form, name='query_transform_form'), # this appears to be necessary to access the view from a pytest API unit test diff --git a/swirl/views.py b/swirl/views.py index a4aed2aee..7c476fe79 100644 --- a/swirl/views.py +++ b/swirl/views.py @@ -170,9 +170,6 @@ class Meta: class AuthenticatorViewSet(viewsets.ModelViewSet): serializer_class = AuthenticatorSerializer - def get_queryset(self): - return AuthenticatorModel.objects.all() - def list(self, request): return return_authenticators_list(request) @@ -570,6 +567,7 @@ def list(self, request): return Response(paginate(serializer.data, self.request), status=status.HTTP_200_OK) ######################################## + def create(self, request): # check permissions diff --git a/swirl_server/settings.py b/swirl_server/settings.py index 3977cc514..a097bf824 100644 --- a/swirl_server/settings.py +++ b/swirl_server/settings.py @@ -13,7 +13,6 @@ from pathlib import Path import environ import os -from drf_yasg.inspectors import CamelCaseJSONFilter, ReferencingSerializerInspector # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent @@ -51,8 +50,7 @@ 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', - 'django.contrib.staticfiles', - 'drf_yasg' + 'django.contrib.staticfiles' ] ASGI_APPLICATION = 'swirl_server.routing.application' @@ -71,7 +69,6 @@ 'django.contrib.auth.middleware.AuthenticationMiddleware', 'swirl.middleware.TokenMiddleware', 'swirl.middleware.SpyglassAuthenticatorsMiddleware', - 'swirl.middleware.SwaggerMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', ] From 5ce7b191c3d9c7b02ee8fc5ce420df3b01031307 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Fri, 3 Nov 2023 14:33:25 -0400 Subject: [PATCH 22/88] remove all GH workflow yml from develop --- .github/workflows/docker-image-spg.yml | 73 ------------------ .github/workflows/docker-image.yml | 67 ---------------- .github/workflows/integration-api-tests.yml | 85 --------------------- .github/workflows/sectest-docker-image.yml | 24 ------ .github/workflows/smoke-tests.yml | 81 -------------------- .github/workflows/spell-checker.yml | 18 ----- .github/workflows/typos.toml | 8 -- .github/workflows/unit-tests.yml | 57 -------------- 8 files changed, 413 deletions(-) delete mode 100644 .github/workflows/docker-image-spg.yml delete mode 100644 .github/workflows/docker-image.yml delete mode 100644 .github/workflows/integration-api-tests.yml delete mode 100644 .github/workflows/sectest-docker-image.yml delete mode 100644 .github/workflows/smoke-tests.yml delete mode 100644 .github/workflows/spell-checker.yml delete mode 100644 .github/workflows/typos.toml delete mode 100644 .github/workflows/unit-tests.yml diff --git a/.github/workflows/docker-image-spg.yml b/.github/workflows/docker-image-spg.yml deleted file mode 100644 index 6b4971f74..000000000 --- a/.github/workflows/docker-image-spg.yml +++ /dev/null @@ -1,73 +0,0 @@ -name: LatestSpyglassDockerBuild - -on: - workflow_run: - workflows: [IntegrationAPITests] - types: - - completed - # Allow manual run of this workflow from the Actions tab - workflow_dispatch: - -# For debugging -# on: -# push: -# branches: '' - -jobs: - - build: - if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') - runs-on: ubuntu-latest - - steps: - - name: Download branch and run_id artifacts - uses: dawidd6/action-download-artifact@v2 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - workflow: integration-api-tests.yml - name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} - path: ./artifacts - continue-on-error: true # Allow the step to fail without stopping the workflow - - name: Determine branch for checkout - id: determine_branch - run: | - if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then - echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV - echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV - else - BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) - echo "branch=$BRANCH_NAME" >> $GITHUB_ENV - fi - - name: Print branch to be checked out - run: | - echo "Branch to checkout: ${{ env.branch }}" - - name: Checkout the code - uses: actions/checkout@v4 - with: - ref: ${{ env.branch }} - - name: Build the Docker image - run: docker build -t swirlai/spyglass:latest -f Dockerfile.spg . - - name: login to docker hub - run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - - name: Push the Docker image - run: docker push swirlai/spyglass - - name: Ensure artifacts directory exists and write branch and run_id again - run: | - mkdir -p ./artifacts - echo "${{ env.branch }}" > ./artifacts/branch.txt - echo "${{ env.original_run_id }}" > ./artifacts/run_id.txt - - name: Re-upload branch and run_id for subsequent workflows - uses: actions/upload-artifact@v3 - with: - name: branch-info-${{ github.run_id }} - path: | - ./artifacts/branch.txt - ./artifacts/run_id.txt - - name: Upload log files - if: always() - uses: actions/upload-artifact@v3 - with: - name: log-files - path: | - logs/ - /var/log/syslog* \ No newline at end of file diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml deleted file mode 100644 index ba3f9f83d..000000000 --- a/.github/workflows/docker-image.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: DockerBuild - -on: - workflow_run: - workflows: [LatestSpyglassDockerBuild] - types: - - completed - # Allow manual run of this workflow from the Actions tab - workflow_dispatch: - -# For debugging -# on: -# push: -# branches: '' - -jobs: - - build: - if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') - runs-on: ubuntu-latest - - steps: - - name: Download branch and run_id artifacts - uses: dawidd6/action-download-artifact@v2 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - workflow: docker-image-spg.yml - name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} - path: ./artifacts - continue-on-error: true # Allow the step to fail without stopping the workflow - - name: Determine branch for checkout - id: determine_branch - run: | - if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then - echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV - echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV - else - BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) - echo "branch=$BRANCH_NAME" >> $GITHUB_ENV - fi - - name: Print branch to be checked out - run: | - echo "Branch to checkout: ${{ env.branch }}" - - name: Checkout the code - uses: actions/checkout@v4 - with: - ref: ${{ env.branch }} - - name: login to docker hub - run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - - name: builder bootstrap - run: docker buildx create --name devBuilder --use --bootstrap - - name: Build the Docker image - run: docker buildx build -t swirlai/swirl-search:develop --platform linux/amd64,linux/arm64 --push . - - name: Update repo description - uses: peter-evans/dockerhub-description@v3.3.0 - with: - username: ${{ secrets.DOCKER_USERNAME_X }} - password: ${{ secrets.DOCKER_PASSWORD_X }} - repository: swirlai/swirl-search - - name: Upload log files - if: always() - uses: actions/upload-artifact@v3 - with: - name: log-files - path: | - logs/ - /var/log/syslog* diff --git a/.github/workflows/integration-api-tests.yml b/.github/workflows/integration-api-tests.yml deleted file mode 100644 index a5f9154bd..000000000 --- a/.github/workflows/integration-api-tests.yml +++ /dev/null @@ -1,85 +0,0 @@ -name: IntegrationAPITests - -on: - workflow_run: - workflows: [SmokeTests] - types: - - completed - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -jobs: - - build: - if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') - runs-on: ubuntu-latest - - steps: - - name: Download branch and run_id artifacts - uses: dawidd6/action-download-artifact@v2 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - workflow: smoke-tests.yml - name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} - path: ./artifacts - continue-on-error: true # Allow the step to fail without stopping the workflow - - name: Determine branch for checkout - id: determine_branch - run: | - if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then - echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV - echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV - else - BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) - echo "branch=$BRANCH_NAME" >> $GITHUB_ENV - fi - - name: Print branch to be checked out - run: | - echo "Branch to checkout: ${{ env.branch }}" - - name: Checkout the code - uses: actions/checkout@v4 - with: - ref: ${{ env.branch }} - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - cache: 'pip' # caching pip stuff - - name: Run Install Swirl - run: ./install.sh - - name: Update apt - run: sudo apt -o Acquire::Retries=3 update - - name: upgrade Ubuntu to latest patches - run: sudo apt upgrade -y - - name: stop update-notifier which restarts datetime - run: sudo systemctl stop update-notifier-download.timer - - name: disable update-notifier which restarts datetime - run: sudo systemctl disable update-notifier-download.timer - - name: Run Install redist-server - run: sudo apt install -y redis-server - - name: Set up Swirl - run: python swirl.py setup - - name: Start up Swirl - run: python swirl.py start - - name: Run Integrated API tests - run: docker run --net=host -t swirlai/swirl-testing:latest-integrated-api sh -c "behave --tags=integrated_api" - - name: Ensure artifacts directory exists and write branch and run_id again - run: | - mkdir -p ./artifacts - echo "${{ env.branch }}" > ./artifacts/branch.txt - echo "${{ env.original_run_id }}" > ./artifacts/run_id.txt - - name: Re-upload branch and run_id for subsequent workflows - uses: actions/upload-artifact@v3 - with: - name: branch-info-${{ github.run_id }} - path: | - ./artifacts/branch.txt - ./artifacts/run_id.txt - - name: Upload log files - if: always() - uses: actions/upload-artifact@v3 - with: - name: log-files - path: | - logs/ - /var/log/syslog* diff --git a/.github/workflows/sectest-docker-image.yml b/.github/workflows/sectest-docker-image.yml deleted file mode 100644 index 5777472bc..000000000 --- a/.github/workflows/sectest-docker-image.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Security Testing Build - -# Build a multi-arch docker image for testing security updates to Swirl - -on: - push: - # only trigger on branches to security-testing, not on tags - branches: 'security-testing' - - -jobs: - - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - name: login to docker hub - run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - - name: builder bootstrap - run: docker buildx create --name devBuilder --use --bootstrap - - name: Build the Docker image - run: docker buildx build -t swirlai/swirl-search:develop-security-testing --platform linux/amd64,linux/arm64 --push . diff --git a/.github/workflows/smoke-tests.yml b/.github/workflows/smoke-tests.yml deleted file mode 100644 index 86fad148c..000000000 --- a/.github/workflows/smoke-tests.yml +++ /dev/null @@ -1,81 +0,0 @@ -name: SmokeTests - -on: - workflow_run: - workflows: [UnitTests] - types: - - completed - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -jobs: - - build: - if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') - runs-on: ubuntu-latest - - steps: - - name: Download branch and run_id artifacts - uses: dawidd6/action-download-artifact@v2 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - workflow: unit-tests.yml - name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} - path: ./artifacts - continue-on-error: true # Allow the step to fail without stopping the workflow - - name: Determine branch for checkout - id: determine_branch - run: | - if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then - echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV - echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV - else - BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) - echo "branch=$BRANCH_NAME" >> $GITHUB_ENV - fi - - name: Print branch to be checked out - run: | - echo "Branch to checkout: ${{ env.branch }}" - - name: Checkout the code - uses: actions/checkout@v4 - with: - ref: ${{ env.branch }} - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - cache: 'pip' # caching pip stuff - - name: Run Install Swirl - run: ./install.sh - - name: Update apt - run: sudo apt -o Acquire::Retries=3 update - - name: upgrade Ubuntu to latest patches - run: sudo apt upgrade -y - - name: Run Install redis-server - run: sudo apt install -y redis-server - - name: Set up Swirl - run: python swirl.py setup - - name: Start up Swirl - run: python swirl.py start - - name: Run Smoke tests - run: docker run --net=host -t swirlai/swirl-testing:latest-smoke-test sh -c "behave **/docker_container/*.feature --tags=docker_api_smoke" - - name: Ensure artifacts directory exists and write branch and run_id again - run: | - mkdir -p ./artifacts - echo "${{ env.branch }}" > ./artifacts/branch.txt - echo "${{ env.original_run_id }}" > ./artifacts/run_id.txt - - name: Re-upload branch and run_id for subsequent workflows - uses: actions/upload-artifact@v3 - with: - name: branch-info-${{ github.run_id }} - path: | - ./artifacts/branch.txt - ./artifacts/run_id.txt - - name: Upload log files - if: always() - uses: actions/upload-artifact@v3 - with: - name: log-files - path: | - logs/ - /var/log/syslog* diff --git a/.github/workflows/spell-checker.yml b/.github/workflows/spell-checker.yml deleted file mode 100644 index 8390041bf..000000000 --- a/.github/workflows/spell-checker.yml +++ /dev/null @@ -1,18 +0,0 @@ -name: Check Spelling - -# Only allow manual run of this workflow from the Actions tab -on: - workflow_dispatch: - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Check Spelling - uses: crate-ci/typos@master # From here: https://github.com/crate-ci/typos - with: - config: ./.github/workflows/typos.toml - write_changes: true # Writes changes on the Action's local checkout \ No newline at end of file diff --git a/.github/workflows/typos.toml b/.github/workflows/typos.toml deleted file mode 100644 index 19821a141..000000000 --- a/.github/workflows/typos.toml +++ /dev/null @@ -1,8 +0,0 @@ -# Exclude the following from spell checking - -[files] -extend-exclude = ["swirl/tests/"] - -[default.extend-identifiers] -# Don't correct this variable from scripts/fix_csv.py -fo = "fo" \ No newline at end of file diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml deleted file mode 100644 index a1063cad6..000000000 --- a/.github/workflows/unit-tests.yml +++ /dev/null @@ -1,57 +0,0 @@ -name: UnitTests - -on: - push: - paths-ignore: - - '.github/**' - - 'README.md' - # Only trigger on branches, not on tags - branches: 'develop' - # Allows you to run this workflow manually from the Actions tab - workflow_dispatch: - -jobs: - - build: - runs-on: ubuntu-latest - - steps: - - name: Checkout the code - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.11' - cache: 'pip' # caching pip stuff - - name: Run Install Swirl - run: ./install.sh - - name: Run Install Tests Swirl - run: ./install-test.sh - - name: Run pytest unit tests - run: pytest - - name: Create artifacts directory - run: mkdir -p artifacts - - name: Set branch name - id: extract_branch - run: | - BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) - echo "branch=$BRANCH_NAME" >> $GITHUB_ENV - - name: Write branch and run_id to file - run: | - echo "${{ env.branch }}" > ./artifacts/branch.txt - echo "${{ github.run_id }}" > ./artifacts/run_id.txt - - name: Upload branch and run_id files as artifact - uses: actions/upload-artifact@v3 - with: - name: branch-info-${{ github.run_id }} - path: | - ./artifacts/branch.txt - ./artifacts/run_id.txt - - name: Upload log files - if: always() - uses: actions/upload-artifact@v3 - with: - name: log-files - path: | - logs/ - /var/log/syslog* From 63d5c97f06ae1b5c2833932c0c0b34155f69e6cf Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Fri, 3 Nov 2023 16:39:14 -0400 Subject: [PATCH 23/88] testing manual workflow run --- .github/workflows/manual-test.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .github/workflows/manual-test.yml diff --git a/.github/workflows/manual-test.yml b/.github/workflows/manual-test.yml new file mode 100644 index 000000000..2e44ebfd4 --- /dev/null +++ b/.github/workflows/manual-test.yml @@ -0,0 +1,10 @@ +name: Manual Dispatch Test + +on: + workflow_dispatch: + +jobs: + test-job: + runs-on: ubuntu-latest + steps: + - run: echo "The workflow_dispatch event is working!" From ca9f4dc0d1996807b0fd66fe739ba6105bae8f0e Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Fri, 3 Nov 2023 17:29:57 -0400 Subject: [PATCH 24/88] removing test file from develop --- .github/workflows/manual-test.yml | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 .github/workflows/manual-test.yml diff --git a/.github/workflows/manual-test.yml b/.github/workflows/manual-test.yml deleted file mode 100644 index 2e44ebfd4..000000000 --- a/.github/workflows/manual-test.yml +++ /dev/null @@ -1,10 +0,0 @@ -name: Manual Dispatch Test - -on: - workflow_dispatch: - -jobs: - test-job: - runs-on: ubuntu-latest - steps: - - run: echo "The workflow_dispatch event is working!" From d89b60d18e0511cf3937bde36d1fe739cb73bee2 Mon Sep 17 00:00:00 2001 From: Erik Spears <98238295+erikspears@users.noreply.github.com> Date: Fri, 3 Nov 2023 17:56:26 -0400 Subject: [PATCH 25/88] Revert "remove all GH workflow yml from develop" --- .github/workflows/docker-image-spg.yml | 73 ++++++++++++++++++ .github/workflows/docker-image.yml | 67 ++++++++++++++++ .github/workflows/integration-api-tests.yml | 85 +++++++++++++++++++++ .github/workflows/sectest-docker-image.yml | 24 ++++++ .github/workflows/smoke-tests.yml | 81 ++++++++++++++++++++ .github/workflows/spell-checker.yml | 18 +++++ .github/workflows/typos.toml | 8 ++ .github/workflows/unit-tests.yml | 57 ++++++++++++++ 8 files changed, 413 insertions(+) create mode 100644 .github/workflows/docker-image-spg.yml create mode 100644 .github/workflows/docker-image.yml create mode 100644 .github/workflows/integration-api-tests.yml create mode 100644 .github/workflows/sectest-docker-image.yml create mode 100644 .github/workflows/smoke-tests.yml create mode 100644 .github/workflows/spell-checker.yml create mode 100644 .github/workflows/typos.toml create mode 100644 .github/workflows/unit-tests.yml diff --git a/.github/workflows/docker-image-spg.yml b/.github/workflows/docker-image-spg.yml new file mode 100644 index 000000000..6b4971f74 --- /dev/null +++ b/.github/workflows/docker-image-spg.yml @@ -0,0 +1,73 @@ +name: LatestSpyglassDockerBuild + +on: + workflow_run: + workflows: [IntegrationAPITests] + types: + - completed + # Allow manual run of this workflow from the Actions tab + workflow_dispatch: + +# For debugging +# on: +# push: +# branches: '' + +jobs: + + build: + if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') + runs-on: ubuntu-latest + + steps: + - name: Download branch and run_id artifacts + uses: dawidd6/action-download-artifact@v2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + workflow: integration-api-tests.yml + name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} + path: ./artifacts + continue-on-error: true # Allow the step to fail without stopping the workflow + - name: Determine branch for checkout + id: determine_branch + run: | + if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then + echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV + echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV + else + BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) + echo "branch=$BRANCH_NAME" >> $GITHUB_ENV + fi + - name: Print branch to be checked out + run: | + echo "Branch to checkout: ${{ env.branch }}" + - name: Checkout the code + uses: actions/checkout@v4 + with: + ref: ${{ env.branch }} + - name: Build the Docker image + run: docker build -t swirlai/spyglass:latest -f Dockerfile.spg . + - name: login to docker hub + run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin + - name: Push the Docker image + run: docker push swirlai/spyglass + - name: Ensure artifacts directory exists and write branch and run_id again + run: | + mkdir -p ./artifacts + echo "${{ env.branch }}" > ./artifacts/branch.txt + echo "${{ env.original_run_id }}" > ./artifacts/run_id.txt + - name: Re-upload branch and run_id for subsequent workflows + uses: actions/upload-artifact@v3 + with: + name: branch-info-${{ github.run_id }} + path: | + ./artifacts/branch.txt + ./artifacts/run_id.txt + - name: Upload log files + if: always() + uses: actions/upload-artifact@v3 + with: + name: log-files + path: | + logs/ + /var/log/syslog* \ No newline at end of file diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 000000000..ba3f9f83d --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,67 @@ +name: DockerBuild + +on: + workflow_run: + workflows: [LatestSpyglassDockerBuild] + types: + - completed + # Allow manual run of this workflow from the Actions tab + workflow_dispatch: + +# For debugging +# on: +# push: +# branches: '' + +jobs: + + build: + if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') + runs-on: ubuntu-latest + + steps: + - name: Download branch and run_id artifacts + uses: dawidd6/action-download-artifact@v2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + workflow: docker-image-spg.yml + name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} + path: ./artifacts + continue-on-error: true # Allow the step to fail without stopping the workflow + - name: Determine branch for checkout + id: determine_branch + run: | + if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then + echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV + echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV + else + BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) + echo "branch=$BRANCH_NAME" >> $GITHUB_ENV + fi + - name: Print branch to be checked out + run: | + echo "Branch to checkout: ${{ env.branch }}" + - name: Checkout the code + uses: actions/checkout@v4 + with: + ref: ${{ env.branch }} + - name: login to docker hub + run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin + - name: builder bootstrap + run: docker buildx create --name devBuilder --use --bootstrap + - name: Build the Docker image + run: docker buildx build -t swirlai/swirl-search:develop --platform linux/amd64,linux/arm64 --push . + - name: Update repo description + uses: peter-evans/dockerhub-description@v3.3.0 + with: + username: ${{ secrets.DOCKER_USERNAME_X }} + password: ${{ secrets.DOCKER_PASSWORD_X }} + repository: swirlai/swirl-search + - name: Upload log files + if: always() + uses: actions/upload-artifact@v3 + with: + name: log-files + path: | + logs/ + /var/log/syslog* diff --git a/.github/workflows/integration-api-tests.yml b/.github/workflows/integration-api-tests.yml new file mode 100644 index 000000000..a5f9154bd --- /dev/null +++ b/.github/workflows/integration-api-tests.yml @@ -0,0 +1,85 @@ +name: IntegrationAPITests + +on: + workflow_run: + workflows: [SmokeTests] + types: + - completed + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + + build: + if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') + runs-on: ubuntu-latest + + steps: + - name: Download branch and run_id artifacts + uses: dawidd6/action-download-artifact@v2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + workflow: smoke-tests.yml + name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} + path: ./artifacts + continue-on-error: true # Allow the step to fail without stopping the workflow + - name: Determine branch for checkout + id: determine_branch + run: | + if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then + echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV + echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV + else + BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) + echo "branch=$BRANCH_NAME" >> $GITHUB_ENV + fi + - name: Print branch to be checked out + run: | + echo "Branch to checkout: ${{ env.branch }}" + - name: Checkout the code + uses: actions/checkout@v4 + with: + ref: ${{ env.branch }} + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' # caching pip stuff + - name: Run Install Swirl + run: ./install.sh + - name: Update apt + run: sudo apt -o Acquire::Retries=3 update + - name: upgrade Ubuntu to latest patches + run: sudo apt upgrade -y + - name: stop update-notifier which restarts datetime + run: sudo systemctl stop update-notifier-download.timer + - name: disable update-notifier which restarts datetime + run: sudo systemctl disable update-notifier-download.timer + - name: Run Install redist-server + run: sudo apt install -y redis-server + - name: Set up Swirl + run: python swirl.py setup + - name: Start up Swirl + run: python swirl.py start + - name: Run Integrated API tests + run: docker run --net=host -t swirlai/swirl-testing:latest-integrated-api sh -c "behave --tags=integrated_api" + - name: Ensure artifacts directory exists and write branch and run_id again + run: | + mkdir -p ./artifacts + echo "${{ env.branch }}" > ./artifacts/branch.txt + echo "${{ env.original_run_id }}" > ./artifacts/run_id.txt + - name: Re-upload branch and run_id for subsequent workflows + uses: actions/upload-artifact@v3 + with: + name: branch-info-${{ github.run_id }} + path: | + ./artifacts/branch.txt + ./artifacts/run_id.txt + - name: Upload log files + if: always() + uses: actions/upload-artifact@v3 + with: + name: log-files + path: | + logs/ + /var/log/syslog* diff --git a/.github/workflows/sectest-docker-image.yml b/.github/workflows/sectest-docker-image.yml new file mode 100644 index 000000000..5777472bc --- /dev/null +++ b/.github/workflows/sectest-docker-image.yml @@ -0,0 +1,24 @@ +name: Security Testing Build + +# Build a multi-arch docker image for testing security updates to Swirl + +on: + push: + # only trigger on branches to security-testing, not on tags + branches: 'security-testing' + + +jobs: + + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: login to docker hub + run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin + - name: builder bootstrap + run: docker buildx create --name devBuilder --use --bootstrap + - name: Build the Docker image + run: docker buildx build -t swirlai/swirl-search:develop-security-testing --platform linux/amd64,linux/arm64 --push . diff --git a/.github/workflows/smoke-tests.yml b/.github/workflows/smoke-tests.yml new file mode 100644 index 000000000..86fad148c --- /dev/null +++ b/.github/workflows/smoke-tests.yml @@ -0,0 +1,81 @@ +name: SmokeTests + +on: + workflow_run: + workflows: [UnitTests] + types: + - completed + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + + build: + if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') + runs-on: ubuntu-latest + + steps: + - name: Download branch and run_id artifacts + uses: dawidd6/action-download-artifact@v2 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + workflow: unit-tests.yml + name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} + path: ./artifacts + continue-on-error: true # Allow the step to fail without stopping the workflow + - name: Determine branch for checkout + id: determine_branch + run: | + if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then + echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV + echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV + else + BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) + echo "branch=$BRANCH_NAME" >> $GITHUB_ENV + fi + - name: Print branch to be checked out + run: | + echo "Branch to checkout: ${{ env.branch }}" + - name: Checkout the code + uses: actions/checkout@v4 + with: + ref: ${{ env.branch }} + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' # caching pip stuff + - name: Run Install Swirl + run: ./install.sh + - name: Update apt + run: sudo apt -o Acquire::Retries=3 update + - name: upgrade Ubuntu to latest patches + run: sudo apt upgrade -y + - name: Run Install redis-server + run: sudo apt install -y redis-server + - name: Set up Swirl + run: python swirl.py setup + - name: Start up Swirl + run: python swirl.py start + - name: Run Smoke tests + run: docker run --net=host -t swirlai/swirl-testing:latest-smoke-test sh -c "behave **/docker_container/*.feature --tags=docker_api_smoke" + - name: Ensure artifacts directory exists and write branch and run_id again + run: | + mkdir -p ./artifacts + echo "${{ env.branch }}" > ./artifacts/branch.txt + echo "${{ env.original_run_id }}" > ./artifacts/run_id.txt + - name: Re-upload branch and run_id for subsequent workflows + uses: actions/upload-artifact@v3 + with: + name: branch-info-${{ github.run_id }} + path: | + ./artifacts/branch.txt + ./artifacts/run_id.txt + - name: Upload log files + if: always() + uses: actions/upload-artifact@v3 + with: + name: log-files + path: | + logs/ + /var/log/syslog* diff --git a/.github/workflows/spell-checker.yml b/.github/workflows/spell-checker.yml new file mode 100644 index 000000000..8390041bf --- /dev/null +++ b/.github/workflows/spell-checker.yml @@ -0,0 +1,18 @@ +name: Check Spelling + +# Only allow manual run of this workflow from the Actions tab +on: + workflow_dispatch: + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Check Spelling + uses: crate-ci/typos@master # From here: https://github.com/crate-ci/typos + with: + config: ./.github/workflows/typos.toml + write_changes: true # Writes changes on the Action's local checkout \ No newline at end of file diff --git a/.github/workflows/typos.toml b/.github/workflows/typos.toml new file mode 100644 index 000000000..19821a141 --- /dev/null +++ b/.github/workflows/typos.toml @@ -0,0 +1,8 @@ +# Exclude the following from spell checking + +[files] +extend-exclude = ["swirl/tests/"] + +[default.extend-identifiers] +# Don't correct this variable from scripts/fix_csv.py +fo = "fo" \ No newline at end of file diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml new file mode 100644 index 000000000..a1063cad6 --- /dev/null +++ b/.github/workflows/unit-tests.yml @@ -0,0 +1,57 @@ +name: UnitTests + +on: + push: + paths-ignore: + - '.github/**' + - 'README.md' + # Only trigger on branches, not on tags + branches: 'develop' + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +jobs: + + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout the code + uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + cache: 'pip' # caching pip stuff + - name: Run Install Swirl + run: ./install.sh + - name: Run Install Tests Swirl + run: ./install-test.sh + - name: Run pytest unit tests + run: pytest + - name: Create artifacts directory + run: mkdir -p artifacts + - name: Set branch name + id: extract_branch + run: | + BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) + echo "branch=$BRANCH_NAME" >> $GITHUB_ENV + - name: Write branch and run_id to file + run: | + echo "${{ env.branch }}" > ./artifacts/branch.txt + echo "${{ github.run_id }}" > ./artifacts/run_id.txt + - name: Upload branch and run_id files as artifact + uses: actions/upload-artifact@v3 + with: + name: branch-info-${{ github.run_id }} + path: | + ./artifacts/branch.txt + ./artifacts/run_id.txt + - name: Upload log files + if: always() + uses: actions/upload-artifact@v3 + with: + name: log-files + path: | + logs/ + /var/log/syslog* From 815f6f11573bc88a80009b5aa4904bb6c495700b Mon Sep 17 00:00:00 2001 From: Erik Spears <98238295+erikspears@users.noreply.github.com> Date: Fri, 3 Nov 2023 18:28:55 -0400 Subject: [PATCH 26/88] Revert "removing 2 workflows from develop" --- .../docker-image-spg-experimental.yml | 34 +++++++++++++++++++ .../workflows/docker-image-spg-preview.yml | 34 +++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 .github/workflows/docker-image-spg-experimental.yml create mode 100644 .github/workflows/docker-image-spg-preview.yml diff --git a/.github/workflows/docker-image-spg-experimental.yml b/.github/workflows/docker-image-spg-experimental.yml new file mode 100644 index 000000000..2f007598c --- /dev/null +++ b/.github/workflows/docker-image-spg-experimental.yml @@ -0,0 +1,34 @@ +name: EXPERIMENTAL Latest Spyglass Docker Build + +on: + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# For debugging +# on: +# push: +# branches: '' + +jobs: + + build: + + runs-on: ubuntu-latest + + steps: + - name: Checkout the code + uses: actions/checkout@v4 + - name: Build the Docker image + run: docker build --no-cache -t swirlai/spyglass:fork-x -f Dockerfile.fork.spg . + - name: login to docker hub + run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin + - name: Push the Docker image + run: docker push swirlai/spyglass:fork-x + - name: Upload log files + if: always() + uses: actions/upload-artifact@v3 + with: + name: log-files + path: | + logs/ + /var/log/syslog* diff --git a/.github/workflows/docker-image-spg-preview.yml b/.github/workflows/docker-image-spg-preview.yml new file mode 100644 index 000000000..0a3601fe3 --- /dev/null +++ b/.github/workflows/docker-image-spg-preview.yml @@ -0,0 +1,34 @@ +name: PREVIEW Latest Spyglass Docker Build + +on: + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# For debugging +# on: +# push: +# branches: '' + +jobs: + + build: + + runs-on: ubuntu-latest + + steps: + - name: Checkout the code + uses: actions/checkout@v4 + - name: Build the Docker image + run: docker build --no-cache -t swirlai/spyglass:preview -f Dockerfile.develop.spg . + - name: login to docker hub + run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin + - name: Push the Docker image + run: docker push swirlai/spyglass:preview + - name: Upload log files + if: always() + uses: actions/upload-artifact@v3 + with: + name: log-files + path: | + logs/ + /var/log/syslog* From ffe858174cf04d542090db4ad520451451e3d33b Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Fri, 3 Nov 2023 18:30:09 -0400 Subject: [PATCH 27/88] fix up docker build workflow on develop --- .github/workflows/docker-image.yml | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index ba3f9f83d..25a33d658 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -1,11 +1,7 @@ name: DockerBuild on: - workflow_run: - workflows: [LatestSpyglassDockerBuild] - types: - - completed - # Allow manual run of this workflow from the Actions tab + # Allows manual run of this workflow from the Actions tab (on any branch) workflow_dispatch: # For debugging @@ -16,35 +12,11 @@ on: jobs: build: - if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') runs-on: ubuntu-latest steps: - - name: Download branch and run_id artifacts - uses: dawidd6/action-download-artifact@v2 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - workflow: docker-image-spg.yml - name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} - path: ./artifacts - continue-on-error: true # Allow the step to fail without stopping the workflow - - name: Determine branch for checkout - id: determine_branch - run: | - if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then - echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV - echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV - else - BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) - echo "branch=$BRANCH_NAME" >> $GITHUB_ENV - fi - - name: Print branch to be checked out - run: | - echo "Branch to checkout: ${{ env.branch }}" - name: Checkout the code uses: actions/checkout@v4 - with: - ref: ${{ env.branch }} - name: login to docker hub run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - name: builder bootstrap From 1cfeb44f112baf07a9b15d529046dccba93eceb5 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Fri, 3 Nov 2023 18:36:21 -0400 Subject: [PATCH 28/88] all gussy up regular spyglass workflow in develop --- .github/workflows/docker-image-spg.yml | 40 -------------------------- 1 file changed, 40 deletions(-) diff --git a/.github/workflows/docker-image-spg.yml b/.github/workflows/docker-image-spg.yml index 6b4971f74..2bd0736a7 100644 --- a/.github/workflows/docker-image-spg.yml +++ b/.github/workflows/docker-image-spg.yml @@ -1,10 +1,6 @@ name: LatestSpyglassDockerBuild on: - workflow_run: - workflows: [IntegrationAPITests] - types: - - completed # Allow manual run of this workflow from the Actions tab workflow_dispatch: @@ -16,53 +12,17 @@ on: jobs: build: - if: (github.event_name == 'workflow_dispatch') || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') runs-on: ubuntu-latest steps: - - name: Download branch and run_id artifacts - uses: dawidd6/action-download-artifact@v2 - with: - github_token: ${{ secrets.GITHUB_TOKEN }} - workflow: integration-api-tests.yml - name: branch-info-${{ github.event_name == 'workflow_run' && github.event.workflow_run.id || github.run_id }} - path: ./artifacts - continue-on-error: true # Allow the step to fail without stopping the workflow - - name: Determine branch for checkout - id: determine_branch - run: | - if [[ -f ./artifacts/branch.txt && -f ./artifacts/run_id.txt ]]; then - echo "branch=$(cat ./artifacts/branch.txt)" >> $GITHUB_ENV - echo "original_run_id=$(cat ./artifacts/run_id.txt)" >> $GITHUB_ENV - else - BRANCH_NAME=$(echo $GITHUB_REF | cut -d "/" -f 3) - echo "branch=$BRANCH_NAME" >> $GITHUB_ENV - fi - - name: Print branch to be checked out - run: | - echo "Branch to checkout: ${{ env.branch }}" - name: Checkout the code uses: actions/checkout@v4 - with: - ref: ${{ env.branch }} - name: Build the Docker image run: docker build -t swirlai/spyglass:latest -f Dockerfile.spg . - name: login to docker hub run: echo "${{ secrets.DOCKER_PASSWORD }}" | docker login -u "${{ secrets.DOCKER_USERNAME }}" --password-stdin - name: Push the Docker image run: docker push swirlai/spyglass - - name: Ensure artifacts directory exists and write branch and run_id again - run: | - mkdir -p ./artifacts - echo "${{ env.branch }}" > ./artifacts/branch.txt - echo "${{ env.original_run_id }}" > ./artifacts/run_id.txt - - name: Re-upload branch and run_id for subsequent workflows - uses: actions/upload-artifact@v3 - with: - name: branch-info-${{ github.run_id }} - path: | - ./artifacts/branch.txt - ./artifacts/run_id.txt - name: Upload log files if: always() uses: actions/upload-artifact@v3 From df136592edb220c6380e799f79458659ffe8a42e Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Sat, 4 Nov 2023 06:24:24 +0100 Subject: [PATCH 29/88] remove unnecessary string --- swirl_server/settings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/swirl_server/settings.py b/swirl_server/settings.py index 3977cc514..10072bef0 100644 --- a/swirl_server/settings.py +++ b/swirl_server/settings.py @@ -13,7 +13,6 @@ from pathlib import Path import environ import os -from drf_yasg.inspectors import CamelCaseJSONFilter, ReferencingSerializerInspector # Build paths inside the project like this: BASE_DIR / 'subdir'. BASE_DIR = Path(__file__).resolve().parent.parent From f2b9715395cff50fbc74a90182020bc634cabb01 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Sat, 4 Nov 2023 06:33:01 +0100 Subject: [PATCH 30/88] reimplement openapi swagger --- requirements.txt | 3 ++- swirl/middleware.py | 23 ++++++++++++++++++++--- swirl/models.py | 2 +- swirl/templates/swagger-ui.html | 28 ---------------------------- swirl/urls.py | 27 +++++++++++++++------------ swirl/views.py | 3 +++ swirl_server/settings.py | 4 +++- 7 files changed, 44 insertions(+), 46 deletions(-) delete mode 100644 swirl/templates/swagger-ui.html diff --git a/requirements.txt b/requirements.txt index 42f46ff3a..c7034a2b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,4 +29,5 @@ readability-lxml tiktoken channels channels-redis -tika \ No newline at end of file +tika +drf-yasg \ No newline at end of file diff --git a/swirl/middleware.py b/swirl/middleware.py index 146caa154..9540b4a38 100644 --- a/swirl/middleware.py +++ b/swirl/middleware.py @@ -1,12 +1,13 @@ from rest_framework.authtoken.models import Token -from django.http import HttpResponseForbidden +from django.http import HttpResponseForbidden, HttpResponse from swirl.models import Search from swirl.authenticators import * from channels.middleware import BaseMiddleware from channels.db import database_sync_to_async from urllib.parse import parse_qs from django.core.exceptions import ObjectDoesNotExist - +import json +import yaml import jwt import logging as logger @@ -106,4 +107,20 @@ def get_search_by_id_and_user(self, search_id, user): try: return Search.objects.filter(pk=search_id, owner=user).exists() except ObjectDoesNotExist: - return None \ No newline at end of file + return None + +class SwaggerMiddleware: + def __init__(self, get_response): + self.get_response = get_response + + def __call__(self, request): + format = request.GET.get('format') + if '/swirl/swagger' in request.path and format and format == 'openapi': + response = self.get_response(request) + if response.status_code == 200: + openapi_data = json.loads(response.content) + yaml_content = yaml.dump(openapi_data, default_flow_style=False) + response = HttpResponse(yaml_content, content_type='text/yaml') + return response + return self.get_response(request) + return self.get_response(request) \ No newline at end of file diff --git a/swirl/models.py b/swirl/models.py index 837f3cf0f..971ae554d 100644 --- a/swirl/models.py +++ b/swirl/models.py @@ -245,7 +245,7 @@ class QueryTransform(models.Model) : ('synonym', 'Synonym' ), ('bag', 'Synonym Bag' ) ] - qrx_type = models.CharField(max_length=64, default='', choices=QUERY_TRASNSFORM_TYPE_CHOICES) + qrx_type = models.CharField(max_length=64, default='rewrite', choices=QUERY_TRASNSFORM_TYPE_CHOICES) config_content = models.TextField() class Meta: unique_together = [ diff --git a/swirl/templates/swagger-ui.html b/swirl/templates/swagger-ui.html deleted file mode 100644 index f726bab99..000000000 --- a/swirl/templates/swagger-ui.html +++ /dev/null @@ -1,28 +0,0 @@ - - - - Swirl Swagger - - - - - -
- - - - diff --git a/swirl/urls.py b/swirl/urls.py index bad2e94f2..47d82dbf4 100644 --- a/swirl/urls.py +++ b/swirl/urls.py @@ -6,11 +6,11 @@ # from webbrowser import get from django.urls import include, path -from django.views.generic import TemplateView -from rest_framework.schemas import get_schema_view -from rest_framework import routers +from rest_framework import routers, permissions from . import views from swirl.authenticators import Microsoft +from drf_yasg.views import get_schema_view +from drf_yasg import openapi router = routers.DefaultRouter() router.register(r'users', views.UserViewSet) @@ -25,16 +25,19 @@ router.register(r'sapi/authenticators', views.AuthenticatorViewSet, basename='galaxy-authenticators') router.register(r'sapi/searchproviders', views.SearchProviderViewSet, basename='galaxy-searchproviders'), +schema_view = get_schema_view( + openapi.Info( + title="Swirl Swagger", + default_version="v1", + description="Swirl API descriptions", + ), + public=True, + permission_classes=(permissions.AllowAny,), +) + urlpatterns = [ - path('openapi', get_schema_view( - title="Swirl Swagger", - description="Swirl API descriptions", - version="1.1.0" - ), name='openapi-schema'), - path('swagger-ui/', TemplateView.as_view( - template_name='swagger-ui.html', - extra_context={'schema_url':'openapi-schema'} - ), name='swagger-ui'), + path('swagger/', schema_view.with_ui(cache_timeout=0), + name='schema-swagger-ui'), path('query_transform_form/', views.query_transform_form, name='query_transform_form'), # this appears to be necessary to access the view from a pytest API unit test diff --git a/swirl/views.py b/swirl/views.py index 7c476fe79..e9742c1c9 100644 --- a/swirl/views.py +++ b/swirl/views.py @@ -170,6 +170,9 @@ class Meta: class AuthenticatorViewSet(viewsets.ModelViewSet): serializer_class = AuthenticatorSerializer + def get_queryset(self): + return AuthenticatorModel.objects.all() + def list(self, request): return return_authenticators_list(request) diff --git a/swirl_server/settings.py b/swirl_server/settings.py index a097bf824..10072bef0 100644 --- a/swirl_server/settings.py +++ b/swirl_server/settings.py @@ -50,7 +50,8 @@ 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', - 'django.contrib.staticfiles' + 'django.contrib.staticfiles', + 'drf_yasg' ] ASGI_APPLICATION = 'swirl_server.routing.application' @@ -69,6 +70,7 @@ 'django.contrib.auth.middleware.AuthenticationMiddleware', 'swirl.middleware.TokenMiddleware', 'swirl.middleware.SpyglassAuthenticatorsMiddleware', + 'swirl.middleware.SwaggerMiddleware', 'django.contrib.messages.middleware.MessageMiddleware', 'django.middleware.clickjacking.XFrameOptionsMiddleware', ] From 39bc4785c9b646722321008cbb214a9d748cbb3e Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Sat, 4 Nov 2023 06:44:53 +0100 Subject: [PATCH 31/88] upgrade django to 4.2.7 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 42f46ff3a..ae21496bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ requests -django +Django==4.2.7 django_restframework django-celery-beat django-rest-swagger From 3369b594972ff9d51166243fc456a9df9cce2e19 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Sat, 4 Nov 2023 15:15:39 -0400 Subject: [PATCH 32/88] update Swagger URL on Django homepage --- swirl/templates/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swirl/templates/index.html b/swirl/templates/index.html index f15e41097..d7cac78b3 100644 --- a/swirl/templates/index.html +++ b/swirl/templates/index.html @@ -11,7 +11,7 @@ ⟶ QueryTransforms
Search
Results
-⟶ Swagger

+⟶ Swagger

Utilities:
Admin
Documentation
From c5626a9bef31edcd9e35acc1f983656e1a11c814 Mon Sep 17 00:00:00 2001 From: Sid Date: Sat, 4 Nov 2023 17:03:39 -0400 Subject: [PATCH 33/88] Various fixes, Search QS and Q tested --- Swirl.postman_collection.json | 71 ++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 10 deletions(-) diff --git a/Swirl.postman_collection.json b/Swirl.postman_collection.json index feede53de..158ce1759 100644 --- a/Swirl.postman_collection.json +++ b/Swirl.postman_collection.json @@ -1,12 +1,13 @@ { "info": { - "_postman_id": "a50db930-dcad-49b1-a3b4-8e3fc0237908", + "_postman_id": "c95e9c5d-7f1e-497d-8331-b827f3535aef", "name": "Swirl", - "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json" + "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", + "_exporter_id": "30951883" }, "item": [ { - "name": "SearchProviders", + "name": "Create SearchProvider", "request": { "method": "POST", "header": [ @@ -34,7 +35,7 @@ "response": [] }, { - "name": "SearchProviders", + "name": "List SearchProviders", "request": { "method": "GET", "header": [ @@ -58,7 +59,7 @@ "response": [] }, { - "name": "Search", + "name": "Create Search", "request": { "method": "POST", "header": [ @@ -68,6 +69,10 @@ "type": "default" } ], + "body": { + "mode": "raw", + "raw": "{\n \"query_string\": \"swirl metasearch\"\n}" + }, "url": { "raw": "{{baseUrl}}swirl/search/", "host": [ @@ -82,7 +87,7 @@ "response": [] }, { - "name": "Search", + "name": "Search QS", "request": { "method": "GET", "header": [ @@ -93,20 +98,66 @@ } ], "url": { - "raw": "{{baseUrl}}swirl/search/", + "raw": "{{baseUrl}}swirl/search/?qs=swirl+metasearch", + "host": [ + "{{baseUrl}}swirl" + ], + "path": [ + "search", + "" + ], + "query": [ + { + "key": "qs", + "value": "swirl+metasearch" + }, + { + "key": "providers", + "value": "2,3", + "disabled": true + } + ] + } + }, + "response": [] + }, + { + "name": "Search Q", + "request": { + "method": "GET", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "default" + } + ], + "url": { + "raw": "{{baseUrl}}swirl/search/?q=swirl+metasearch", "host": [ "{{baseUrl}}swirl" ], "path": [ "search", "" + ], + "query": [ + { + "key": "q", + "value": "swirl+metasearch" + }, + { + "key": "providers", + "value": "2,3", + "disabled": true + } ] } }, "response": [] }, { - "name": "Results", + "name": "Results by search_id", "request": { "method": "GET", "header": [ @@ -117,7 +168,7 @@ } ], "url": { - "raw": "{{baseUrl}}swirl/results?search_id=59", + "raw": "{{baseUrl}}swirl/results?search_id=1", "host": [ "{{baseUrl}}swirl" ], @@ -127,7 +178,7 @@ "query": [ { "key": "search_id", - "value": "59" + "value": "1" } ] } From 059b43158d2cdcb83d6f6d24541f337516421beb Mon Sep 17 00:00:00 2001 From: Sid Date: Sat, 4 Nov 2023 17:26:07 -0400 Subject: [PATCH 34/88] More updates/fixes, MS PA generated swagger file added --- Swirl-Metasearch.swagger.json | 180 ++++++++++++++++++++++++++++++++++ Swirl.postman_collection.json | 3 +- 2 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 Swirl-Metasearch.swagger.json diff --git a/Swirl-Metasearch.swagger.json b/Swirl-Metasearch.swagger.json new file mode 100644 index 000000000..ecb09753b --- /dev/null +++ b/Swirl-Metasearch.swagger.json @@ -0,0 +1,180 @@ +{ + "swagger": "2.0", + "info": { + "version": "1.0.0", + "title": "Swirl", + "description": "Swirl is a metasearch engine that uses AI to simultaneously re-rank results and then optionally prompt Generative AI - enabling you to get answers based on your own data." + }, + "host": "search.swirl.today", + "basePath": "/swirl/", + "schemes": [ + "https" + ], + "consumes": [], + "produces": [ + "application/json" + ], + "paths": { + "/searchproviders/": { + "get": { + "summary": "SearchProviders List", + "description": "SearchProviders List", + "operationId": "SearchprovidersList", + "parameters": [ + { + "name": "Content-Type", + "in": "header", + "required": true, + "type": "string", + "default": "application/json", + "description": "Content-Type" + } + ], + "responses": { + "default": { + "description": "default", + "schema": {} + } + } + }, + "post": { + "summary": "SearchProviders Create", + "description": "SearchProviders Create", + "operationId": "SearchprovidersCreate", + "parameters": [ + { + "name": "Content-Type", + "in": "header", + "required": true, + "type": "string", + "default": "application/json", + "description": "Content-Type" + }, + { + "name": "body", + "in": "body", + "schema": { + "type": "string" + }, + "required": true + } + ], + "responses": { + "default": { + "description": "default", + "schema": {} + } + } + } + }, + "/search/": { + "get": { + "summary": "SearchQS", + "description": "SearchQS", + "operationId": "SearchQS", + "parameters": [ + { + "name": "qs", + "default": "swirl metasearch", + "in": "query", + "type": "string", + "required": true + }, + { + "name": "Content-Type", + "in": "header", + "required": true, + "type": "string", + "default": "application/json", + "description": "Content-Type" + } + ], + "responses": { + "default": { + "description": "default", + "schema": {} + } + }, + "x-ms-visibility": "important" + }, + "post": { + "summary": "Search Create", + "description": "Search Create", + "operationId": "SearchCreate", + "parameters": [ + { + "name": "Content-Type", + "in": "header", + "required": true, + "type": "string", + "default": "application/json", + "description": "Content-Type" + }, + { + "name": "body", + "in": "body", + "schema": { + "type": "object", + "properties": { + "query_string": { + "type": "string", + "description": "query_string" + } + }, + "default": { + "query_string": "swirl metasearch" + } + }, + "required": true + } + ], + "responses": { + "default": { + "description": "default", + "schema": {} + } + } + } + }, + "/results": { + "get": { + "summary": "Results", + "description": "Results", + "operationId": "Results", + "parameters": [ + { + "name": "search_id", + "default": "1", + "in": "query", + "type": "string", + "required": true + }, + { + "name": "Content-Type", + "in": "header", + "required": true, + "type": "string", + "default": "application/json", + "description": "Content-Type" + } + ], + "responses": { + "default": { + "description": "default", + "schema": {} + } + } + } + } + }, + "definitions": {}, + "parameters": {}, + "responses": {}, + "securityDefinitions": { + "undefined": { + "type": "basic" + } + }, + "security": [], + "tags": [] +} \ No newline at end of file diff --git a/Swirl.postman_collection.json b/Swirl.postman_collection.json index 158ce1759..960873d26 100644 --- a/Swirl.postman_collection.json +++ b/Swirl.postman_collection.json @@ -3,7 +3,8 @@ "_postman_id": "c95e9c5d-7f1e-497d-8331-b827f3535aef", "name": "Swirl", "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", - "_exporter_id": "30951883" + "_exporter_id": "30951883", + "_collection_link": "https://winter-robot-24235.postman.co/workspace/My-Workspace~035521da-9255-4685-8e34-5b3c78679e48/collection/30951883-c95e9c5d-7f1e-497d-8331-b827f3535aef?action=share&creator=30951883&source=collection_link" }, "item": [ { From 4840e97df685723ff7fb7146a1b6b915ec268a0a Mon Sep 17 00:00:00 2001 From: Sid Date: Sat, 4 Nov 2023 17:58:38 -0400 Subject: [PATCH 35/88] Added more methods --- Swirl.postman_collection.json | 92 ++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) diff --git a/Swirl.postman_collection.json b/Swirl.postman_collection.json index 960873d26..03775380a 100644 --- a/Swirl.postman_collection.json +++ b/Swirl.postman_collection.json @@ -20,7 +20,7 @@ ], "body": { "mode": "raw", - "raw": "" + "raw": "{\n \"name\": \"Swirl Documentation TEST - Google PSE\",\n \"active\": false,\n \"default\": true,\n \"authenticator\": \"\",\n \"connector\": \"RequestsGet\",\n \"url\": \"https://www.googleapis.com/customsearch/v1\",\n \"query_template\": \"{url}?cx={cx}&key={key}&q={query_string}\",\n \"post_query_template\": {},\n \"http_request_headers\": {},\n \"page_fetch_config_json\": {\n \"cache\": \"false\",\n \"headers\": {\n \"User-Agent\": \"Swirlbot/1.0 (+http://swirl.today)\"\n },\n \"timeout\": 10\n },\n \"query_processors\": [\n \"AdaptiveQueryProcessor\"\n ],\n \"query_mappings\": \"cx=f047885d9696f447c,DATE_SORT=sort=date,PAGE=start=RESULT_INDEX,NOT_CHAR=-\",\n \"result_grouping_field\": \"\",\n \"result_processors\": [\n \"MappingResultProcessor\",\n \"DateFinderResultProcessor\",\n \"CosineRelevancyResultProcessor\"\n ],\n \"response_mappings\": \"FOUND=searchInformation.totalResults,RETRIEVED=queries.request[0].count,RESULTS=items\",\n \"result_mappings\": \"title=title,url=link,body=snippet,author=displayLink,cacheId,pagemap.metatags[*].['og:type'],pagemap.metatags[*].['og:site_name'],pagemap.metatags[*].['og:description'],NO_PAYLOAD\",\n \"results_per_query\": 10,\n \"credentials\": \"key=AIzaSyDvVeE-L6nCC9u-TTGuhggvSmzhtiTHJsA\",\n \"eval_credentials\": \"\",\n \"tags\": [\n \"Swirl\",\n \"Documentation\"\n ]\n }" }, "url": { "raw": "{{baseUrl}}swirl/searchproviders/", @@ -59,6 +59,31 @@ }, "response": [] }, + { + "name": "List SearchProvider", + "request": { + "method": "GET", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "default" + } + ], + "url": { + "raw": "{{baseUrl}}swirl/searchproviders/1/", + "host": [ + "{{baseUrl}}swirl" + ], + "path": [ + "searchproviders", + "1", + "" + ] + } + }, + "response": [] + }, { "name": "Create Search", "request": { @@ -87,6 +112,69 @@ }, "response": [] }, + { + "name": "List Searches", + "protocolProfileBehavior": { + "disableBodyPruning": true + }, + "request": { + "method": "GET", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "default" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"query_string\": \"swirl metasearch\"\n}" + }, + "url": { + "raw": "{{baseUrl}}swirl/search/", + "host": [ + "{{baseUrl}}swirl" + ], + "path": [ + "search", + "" + ] + } + }, + "response": [] + }, + { + "name": "List Search", + "protocolProfileBehavior": { + "disableBodyPruning": true + }, + "request": { + "method": "GET", + "header": [ + { + "key": "Content-Type", + "value": "application/json", + "type": "default" + } + ], + "body": { + "mode": "raw", + "raw": "{\n \"query_string\": \"swirl metasearch\"\n}" + }, + "url": { + "raw": "{{baseUrl}}swirl/search/1/", + "host": [ + "{{baseUrl}}swirl" + ], + "path": [ + "search", + "1", + "" + ] + } + }, + "response": [] + }, { "name": "Search QS", "request": { @@ -158,7 +246,7 @@ "response": [] }, { - "name": "Results by search_id", + "name": "List Results by search_id", "request": { "method": "GET", "header": [ From fd5e779b55d69e554a43300135fb4f7b0e74bb86 Mon Sep 17 00:00:00 2001 From: Sid Date: Sat, 4 Nov 2023 18:23:30 -0400 Subject: [PATCH 36/88] Moar updates, added documentation & logo --- Swirl.postman_collection.json | 1 + 1 file changed, 1 insertion(+) diff --git a/Swirl.postman_collection.json b/Swirl.postman_collection.json index 03775380a..92c636513 100644 --- a/Swirl.postman_collection.json +++ b/Swirl.postman_collection.json @@ -2,6 +2,7 @@ "info": { "_postman_id": "c95e9c5d-7f1e-497d-8331-b827f3535aef", "name": "Swirl", + "description": "\n\n# Swirl Documentation\n\nThank you for taking a look at Swirl! For more information, visit our documention site:\n\n- [Overview](https://docs.swirl.today/)\n- [Quick Start](https://docs.swirl.today/Quick-Start)\n- [User Guide](https://docs.swirl.today/User-Guide)\n- [Admin Guide](https://docs.swirl.today/Admin-Guide)\n- [M365 Guide](https://docs.swirl.today/M365-Guide)\n- [Developer Guide](https://docs.swirl.today/Developer-Guide)\n- [Developer Reference](https://docs.swirl.today/Developer-Reference)\n- [AI Guide](https://docs.swirl.today/AI-Guide)\n \n\nFor more information, please visit:\n\n- [www.swirl.today](https://swirl.today)\n- [github.com/swirlai/swirl-search](https://github.com/swirlai/swirl-search)\n \n\nFor support. please:\n\n- Join the [Swirl Community on Slack](https://join.slack.com/t/swirlmetasearch/shared_invite/zt-1qk7q02eo-kpqFAbiZJGOdqgYVvR1sfw) and visit the #support channel\n- Email: [support@swirl.today](mailto:support@swirl.today)", "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", "_exporter_id": "30951883", "_collection_link": "https://winter-robot-24235.postman.co/workspace/My-Workspace~035521da-9255-4685-8e34-5b3c78679e48/collection/30951883-c95e9c5d-7f1e-497d-8331-b827f3535aef?action=share&creator=30951883&source=collection_link" From 7704df317edd257a18643d73bb1bb3bf388635d3 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Mon, 6 Nov 2023 11:28:58 -0500 Subject: [PATCH 37/88] cherry-pick badge fixes on Readme from main --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 3f4e31a2c..a3d32e04c 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,8 @@ [![GitHub Release](https://img.shields.io/github/v/release/swirlai/swirl-search?style=flat-square&color=8DDFCB&label=Release)](https://github.com/swirlai/swirl-search/releases) [![Docker Build](https://github.com/swirlai/swirl-search/actions/workflows/docker-image.yml/badge.svg?style=flat-square&branch=main)](https://github.com/swirlai/swirl-search/actions/workflows/docker-image.yml) [![Tests](https://github.com/swirlai/swirl-search/actions/workflows/smoke-tests.yml/badge.svg?branch=main)](https://github.com/swirlai/swirl-search/actions/workflows/smoke-tests.yml) -[![Slack](https://custom-icon-badges.demolab.com/badge/Join%20Our%20Slack-black?style=flat-square&logo=slack&color=0E21A0&logoColor=white)](https://join.slack.com/t/swirlmetasearch/shared_invite/zt-1qk7q02eo-kpqFAbiZJGOdqgYVvR1sfw) -[![Website](https://custom-icon-badges.demolab.com/badge/www.swirl.today-black?style=flat-square&logo=globe&color=241468&logoColor=white)](https://www.swirl.today) +[![Static Badge](https://img.shields.io/badge/Join%20Our%20Slack-0E21A0?logo=slack)](https://join.slack.com/t/swirlmetasearch/shared_invite/zt-1qk7q02eo-kpqFAbiZJGOdqgYVvR1sfw) +[![Website](https://img.shields.io/badge/Swirl.Today-241468)](https://www.swirl.today)
From 04547a649c44ad15ce0d53ddcf46ce3f6e46eb57 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Tue, 7 Nov 2023 16:18:12 +0100 Subject: [PATCH 38/88] fix bug with undefined search variable --- swirl/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swirl/views.py b/swirl/views.py index e9742c1c9..5ce394e08 100644 --- a/swirl/views.py +++ b/swirl/views.py @@ -590,10 +590,10 @@ def create(self, request): if Search.objects.filter(id=serializer.data['id'], owner=self.request.user).exists(): search = Search.objects.get(id=serializer.data['id']) search.status = 'ERR_NO_SEARCHPROVIDERS' - search.save + search.save() else: # search_task.delay(serializer.data['id'], Authenticator().get_session_data(request)) - logger.info(f"{request.user} search_post {search.id}") + logger.info(f"{request.user} search_post") run_search(serializer.data['id'], Authenticator().get_session_data(request), request=request) return Response(serializer.data, status=status.HTTP_201_CREATED) From c82a0731e423cdd2cc677aff01e9758e87e93254 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Tue, 7 Nov 2023 10:19:25 -0500 Subject: [PATCH 39/88] fix 2 GH SP problems --- SearchProviders/github.json | 4 ++-- SearchProviders/preloaded.json | 4 ++-- db.sqlite3.dist | Bin 294912 -> 294912 bytes 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/SearchProviders/github.json b/SearchProviders/github.json index b560961f9..2e82f94d5 100644 --- a/SearchProviders/github.json +++ b/SearchProviders/github.json @@ -37,7 +37,7 @@ "default": false, "connector": "RequestsGet", "url": "https://api.github.com/search/issues", - "query_template": "{url}?q={query_string}%2Bis%3Aissue", + "query_template": "{url}?q={query_string}+is%3Aissuee", "post_query_template": "{}", "http_request_headers": { "Accept": "application/vnd.github.text-match+json" @@ -69,7 +69,7 @@ "default": false, "connector": "RequestsGet", "url": "https://api.github.com/search/issues", - "query_template": "{url}?q={query_string}%2Bis%3Apull-request", + "query_template": "{url}?q={query_string}+is%3Apull-request", "post_query_template": "{}", "http_request_headers": { "Accept": "application/vnd.github.text-match+json" diff --git a/SearchProviders/preloaded.json b/SearchProviders/preloaded.json index 80d22b62a..818b1633a 100644 --- a/SearchProviders/preloaded.json +++ b/SearchProviders/preloaded.json @@ -557,7 +557,7 @@ "default": false, "connector": "RequestsGet", "url": "https://api.github.com/search/issues", - "query_template": "{url}?q={query_string}%2Bis%3Aissue", + "query_template": "{url}?q={query_string}+is%3Aissue", "post_query_template": "{}", "http_request_headers": { "Accept": "application/vnd.github.text-match+json" @@ -589,7 +589,7 @@ "default": false, "connector": "RequestsGet", "url": "https://api.github.com/search/issues", - "query_template": "{url}?q={query_string}%2Bis%3Apull-request", + "query_template": "{url}?q={query_string}+is%3Apull-request", "post_query_template": "{}", "http_request_headers": { "Accept": "application/vnd.github.text-match+json" diff --git a/db.sqlite3.dist b/db.sqlite3.dist index d0b22dc2a6c5166600e4ca963a83222876219221..f113b3b94e01e2f542cdbd20df32c114e3ad4eeb 100644 GIT binary patch delta 3681 zcmai$O>9(E6vt=UTSNkl&~}E-OeuvHQfeRf`=d(2wA2ozP@x4RhDa%dnTn;@RtiR~ zL@+90a~E%=h8PLa#1D*Kjfo!%U6?2&e#FGMF)=I*3pB=s8oh5`Mc#ccx}D$r&pG#; z|2^-{&##!DU-8ClVzMG(rJYpiyA zHSbrMwHc>0E{GzfHa(Te>Gj!M3{%W_X>}DJ&*mB&HB(&Z7qZGd4I+?o{Zm%C@u0#; zlv-@OW<9bpAH$qcS}IiW^9dL+&P(|!z81q!QKDceSH-9~?-T{4;7IBN&0CzTP;iQH zX=PRSWwTop6bwRD%2shpE{+MuqLiuPBe}dY11Rk1<)$V(gARQon^}tjmmqHKc*SLTYNSHp8=P>{grus_Wa{=X$3cb95 zVt`P|^_~KA0a278{bGT}0F#91YX#*v1`@=XCN0nvU}dY)ClY|9lIZtaV%1_QAk}}i z#Hz&zA*Op*#SV@HWJI4!09aZdf3r%sRY8ua)NQRBpK#`6TT2P7txIJ)ierZ!s!T!% z|6{V)Q%VuRH`dgpnyg}Wru_c`rVteAx>QF;aa><%ZHW|TBTB_NxxD(h;oezzknbEn zrH;y4Vh-ckLn5ddX$Qlu zh$+HYLZ_)05W&sWbzqw-qKsfAk}_Fh`)9Uxg74DK5$*4sCyD|hsoB*9cDR|4z(gdT zDIgM?FS^0$Xfv6|gtE?!mlqPO%;O0B7A=o?_S+(@>7q;9nmr^6nwJO|ik8PbtdvA5 zNfZzg+RfOp9dc{5Upm1Oy1~4Fhy|xvWx>CC7Xap01$b@bmM{<6zb0o72=PBwV2!bM zIahPX`P8x>mwHP^7H8@%)i$Tj>#;lL{AV`<6$#1h&>>DLDX;E#0u z(rh7ByuDalym;B#-TVFaJ^&+Q=N`ovC-t>T2&_80(|@%O6w+Phvp(>By8gjTAw~WP zI_$sQ4?3No*iP8GJvJ=v1%#2{x`aWObXBH1NU<_KJ?p=D7 zAB?ecGcO>DnLRter%|g3Yf}5@NLXqiq19$JnEkszchq3QT2V-zs0@hopX>vT!7?2* zO?v=}UJhaHDOl3yBP2rlukQ!gT)YpIqe^bKAtm<77CUB0I181N_k#vkDTsZLC5(dG z?Fj9HG&Uy(5z|XMd1hbm@pK^};W6!1Tx9M%=q`O>r_UKmm`O;;zpHzZZ{-_I|6%Y$ zv}|EVE}(s8#ts}3w(#L7m%(FEM+iH2E@Z;43yI>UVFLW@itJujNsB|+)yWm%h8zV~ zToEPQCQGU@AOsmW3BCo9iz>aq;xyr+hGgMj;rICRG?3B0NRKhj>?2`qB1Os?4bGr+aQOx-5*=>dBQBoGw z=XLqRokbU8t+lL`8u-No%nc#6Vgp}`VPQ1JyjEyn(p7Yl5{4Rz{li^boTx%Grg5#a zq5G`MO-g7=7_H?SxLk;1tU0aa8uKyu8ef2Js7fKZP8D@D%*LSxPRWhD{= zU{39imP8CtEwKM`iMxPn1>joAIF1oqfMZx0#{?7_&VM)wP^J8L%CT&j0c-kymSfp6 zsj>72R>UsO7{kgxp8zn18h*J#xmi&bUZZtbtkP|%N~L%8H9s>#vDiSy#>Ali z$%8AeKc8`rGYJ(wJ#%&@eRihh?U~jaE_l~fK?sjS5eW2@xH9`AUl($36&?9LP zRZ_T`#7WAKD7txSd~VyGk>Kz;R0^^k=tn0q;aosRBt#C0<`!qr<(9r^P>c=JI!OeC zlOR2bGQn$QRJRxNXuc&9l6zV)MxZ<0%Qgw4!3$+{%6?QpuSVl?PccP6m!J#@Z^X3_ zx6m3%SbCHarC40pBs%!epw+>nn^1>c-;I8cCM-RyB^V@09TG|H@LIGxDiWR%#F#MK z=xBl`HG*PH?bRN%(-mp(HsML4kWg;7_oA=Uk$MVGY9?etdQGB<-M0blaw7?@8BE)R z^g?2U{j?9AZjD5eo;DKtJ1%Mx!tE0T{T5A6dP>5kN+?8=aB5#+Xej#9(vwsQmf(>l z;o*Z}J0wxfO_4(vnFK#e4+z0DC`;5IT)hn;`>R3Hl|vv zYRPwty_<#?<=QT`W>e?<@uhRY^LHQ>S&FGtS`>|Xp#r`>QIIx4L z*EvxrB2|JFlot}s8z(M?6YH}TR;|y@&IfN!qDpY6iVg%z>*!|t{Z=%Qjqk#3ki7V^@E7oi+7_bS4oJLNlw;YNWjSr<@5p65mhSBXhxEjQIjxcK`BwhM8;!` zE1fW!o5VC4r_0`YFAQ}@3B!;iJ)QAFVpQ02T>&W9+rdfnZFEvVuR%M&ABUW&i*H From c09e976bfc2d454625e02b93c7ad5e3b784b9535 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Tue, 7 Nov 2023 16:29:54 +0100 Subject: [PATCH 40/88] increase token length --- swirl/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swirl/models.py b/swirl/models.py index 971ae554d..c01e9c2fb 100644 --- a/swirl/models.py +++ b/swirl/models.py @@ -41,8 +41,8 @@ class Authenticator(models.Model): class OauthToken(models.Model): owner = models.ForeignKey('auth.User', on_delete=models.CASCADE) idp = models.CharField(max_length=32, default='Microsoft') - token = models.CharField(max_length=2048) - refresh_token = models.CharField(max_length=2048, blank=True, null=True) + token = models.CharField(max_length=4096) + refresh_token = models.CharField(max_length=4096, blank=True, null=True) class Meta: unique_together = [['owner', 'idp']] From 0a9ce27fc16c47fd45a6c0a6114ffcde5702eab7 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Tue, 7 Nov 2023 11:10:16 -0500 Subject: [PATCH 41/88] tmp skip for now --- swirl/tests/tests.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/swirl/tests/tests.py b/swirl/tests/tests.py index b07be0d8f..398bfbd7f 100644 --- a/swirl/tests/tests.py +++ b/swirl/tests/tests.py @@ -479,7 +479,7 @@ def test_aqp(aqp_test_cases, aqp_test_expected): assert actual == aqp_test_expected[i] i = i + 1 -@pytest.mark.django_db +@pytest.mark.skip(reason="tmp for cgpt") def test_cgptqp_1(): tc = 'gig economy' expected = 'gig economy' @@ -510,7 +510,7 @@ def test_cgptqp_1(): ], temperature=0) -@pytest.mark.django_db +@pytest.mark.skip(reason="tmp for cgpt") def test_cgptqp_2(): tc = 'gig economy' expected = 'gig economy' @@ -542,7 +542,7 @@ def test_cgptqp_2(): ], temperature=0) -@pytest.mark.django_db +@pytest.mark.skip(reason="tmp for cgpt") def test_cgptqp_3(): tc = 'gig economy' expected = 'gig economy' @@ -574,7 +574,7 @@ def test_cgptqp_3(): ], temperature=0) -@pytest.mark.django_db +@pytest.mark.skip(reason="tmp for cgpt") def test_cgptqp_4(): tc = 'gig economy' expected = 'gig economy' @@ -608,7 +608,7 @@ def test_cgptqp_4(): temperature=0) -@pytest.mark.django_db +@pytest.mark.skip(reason="tmp for cgpt") def test_cgptqp_5(): tc = 'gig economy' expected = 'Gig economy large scale economics' @@ -644,7 +644,7 @@ def test_cgptqp_5(): temperature=0) -@pytest.mark.django_db +@pytest.mark.skip(reason="tmp for cgpt") def test_cgptqp_6(): tc = 'gig economy' expected = 'gig economy' @@ -680,7 +680,7 @@ def test_cgptqp_6(): temperature=0) -@pytest.mark.django_db +@pytest.mark.skip(reason="tmp for cgpt") def test_cgptqp_7(): tc = 'gig economy' expected = 'gig economy' From 2dc55bac5b4468a34d38acdbb61e19cc980b9572 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Tue, 7 Nov 2023 11:37:42 -0500 Subject: [PATCH 42/88] check fir micro time and convert --- swirl/processors/utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/swirl/processors/utils.py b/swirl/processors/utils.py index 917f629ae..94e16fbf2 100644 --- a/swirl/processors/utils.py +++ b/swirl/processors/utils.py @@ -769,11 +769,24 @@ def _date_float_parse_to_timestamp(s): logger.debug(f'{x} : unable to convert {s} as float to timestamp') return ret +def try_micro_conversion(date_str): + try: + if date_str.isdigit() and len(date_str) >= 13: + ret_date_str = f'{int(date_str)/1000}' + return ret_date_str + else: + return date_str + except Exception as err: + logger.debug(f'micro conversion failed {err}') + return date_str + + def date_str_to_timestamp(s): """ Convert the input to a string and try to make a timestamp from it using known string formats and raw numeric values """ + s = try_micro_conversion(s) ret = _date_str_parse_to_timestamp(s) if not ret: ret = _date_float_parse_to_timestamp(s) if not ret: From e885516bfdb9bdc05c3856851ecfb1e35649d318 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Tue, 7 Nov 2023 12:05:53 -0500 Subject: [PATCH 43/88] fix typo in GH SP JSON only --- SearchProviders/github.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SearchProviders/github.json b/SearchProviders/github.json index 2e82f94d5..193179278 100644 --- a/SearchProviders/github.json +++ b/SearchProviders/github.json @@ -37,7 +37,7 @@ "default": false, "connector": "RequestsGet", "url": "https://api.github.com/search/issues", - "query_template": "{url}?q={query_string}+is%3Aissuee", + "query_template": "{url}?q={query_string}+is%3Aissue", "post_query_template": "{}", "http_request_headers": { "Accept": "application/vnd.github.text-match+json" From db88c7337f3dddade0283c57da3307a6238fefae Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Tue, 7 Nov 2023 12:51:16 -0500 Subject: [PATCH 44/88] update version on develop --- swirl/banner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swirl/banner.py b/swirl/banner.py index 6b6d929e3..87aa8cd82 100644 --- a/swirl/banner.py +++ b/swirl/banner.py @@ -10,9 +10,9 @@ class bcolors: ENDC = '\033[0m' BOLD = '\033[1m' -SWIRL_VERSION = '3.0.0' +SWIRL_VERSION = '3.1.0' -SWIRL_BANNER_TEXT = "__S_W_I_R_L__3_._0_._0__________________________________________________________" +SWIRL_BANNER_TEXT = "__S_W_I_R_L__3_._1_._0__________________________________________________________" SWIRL_BANNER = f'{bcolors.BOLD}{SWIRL_BANNER_TEXT}{bcolors.ENDC}' ############################################# From f49e7005acf6d27974717291e2aa55c57d67f794 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Tue, 7 Nov 2023 16:32:10 -0500 Subject: [PATCH 45/88] pin open ai for now --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0df6e1261..bc23d2702 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ nltk bs4 google-cloud-bigquery opensearch-py -openai +openai==0.28.1 msal PyJWT pyahocorasick @@ -30,4 +30,4 @@ tiktoken channels channels-redis tika -drf-yasg \ No newline at end of file +drf-yasg From 1c2b46f5d930cf751250ab2dae5ea6d152fa5f2e Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Tue, 7 Nov 2023 16:40:18 -0500 Subject: [PATCH 46/88] fix small bug and eneable tests --- swirl/processors/chatgpt_query.py | 21 +++++++++++---------- swirl/tests/tests.py | 14 +++++++------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/swirl/processors/chatgpt_query.py b/swirl/processors/chatgpt_query.py index 9cbdefe71..a5db8b203 100644 --- a/swirl/processors/chatgpt_query.py +++ b/swirl/processors/chatgpt_query.py @@ -76,17 +76,18 @@ def set_do_filter_from_tags(self): if filter_tag_value == None or len(filter_tag_value) <= 0: self.do_filter = MODEL_DEFAULT_DO_FILTER - try: - if filter_tag_value.lower() == 'true': - self.do_filter = True - elif filter_tag_value.lower() == 'false': - self.do_filter = False - else: - logger.error(f"Error parsing filter tag {filter_tag_value} using default: {MODEL_DEFAULT_DO_FILTER}") + else: + try: + if filter_tag_value.lower() == 'true': + self.do_filter = True + elif filter_tag_value.lower() == 'false': + self.do_filter = False + else: + logger.error(f"Error parsing filter tag {filter_tag_value} using default: {MODEL_DEFAULT_DO_FILTER}") + self.do_filter = MODEL_DEFAULT_DO_FILTER + except Exception as x: + logger.error(f"Exception parsing filter tag {filter_tag_value} using default: {MODEL_DEFAULT_DO_FILTER}") self.do_filter = MODEL_DEFAULT_DO_FILTER - except Exception as x: - logger.error(f"Exception parsing filter tag {filter_tag_value} using default: {MODEL_DEFAULT_DO_FILTER}") - self.do_filter = MODEL_DEFAULT_DO_FILTER def process(self): try: diff --git a/swirl/tests/tests.py b/swirl/tests/tests.py index 398bfbd7f..b07be0d8f 100644 --- a/swirl/tests/tests.py +++ b/swirl/tests/tests.py @@ -479,7 +479,7 @@ def test_aqp(aqp_test_cases, aqp_test_expected): assert actual == aqp_test_expected[i] i = i + 1 -@pytest.mark.skip(reason="tmp for cgpt") +@pytest.mark.django_db def test_cgptqp_1(): tc = 'gig economy' expected = 'gig economy' @@ -510,7 +510,7 @@ def test_cgptqp_1(): ], temperature=0) -@pytest.mark.skip(reason="tmp for cgpt") +@pytest.mark.django_db def test_cgptqp_2(): tc = 'gig economy' expected = 'gig economy' @@ -542,7 +542,7 @@ def test_cgptqp_2(): ], temperature=0) -@pytest.mark.skip(reason="tmp for cgpt") +@pytest.mark.django_db def test_cgptqp_3(): tc = 'gig economy' expected = 'gig economy' @@ -574,7 +574,7 @@ def test_cgptqp_3(): ], temperature=0) -@pytest.mark.skip(reason="tmp for cgpt") +@pytest.mark.django_db def test_cgptqp_4(): tc = 'gig economy' expected = 'gig economy' @@ -608,7 +608,7 @@ def test_cgptqp_4(): temperature=0) -@pytest.mark.skip(reason="tmp for cgpt") +@pytest.mark.django_db def test_cgptqp_5(): tc = 'gig economy' expected = 'Gig economy large scale economics' @@ -644,7 +644,7 @@ def test_cgptqp_5(): temperature=0) -@pytest.mark.skip(reason="tmp for cgpt") +@pytest.mark.django_db def test_cgptqp_6(): tc = 'gig economy' expected = 'gig economy' @@ -680,7 +680,7 @@ def test_cgptqp_6(): temperature=0) -@pytest.mark.skip(reason="tmp for cgpt") +@pytest.mark.django_db def test_cgptqp_7(): tc = 'gig economy' expected = 'gig economy' From 2790be0b0cbf0cf4ffcf768445544d2b2d36f5c2 Mon Sep 17 00:00:00 2001 From: Harshil Khamar <73790584+Harshil0512@users.noreply.github.com> Date: Wed, 8 Nov 2023 11:22:25 +0530 Subject: [PATCH 47/88] Changes Done --- SearchProviders/untested/duck_duck_go.json | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/SearchProviders/untested/duck_duck_go.json b/SearchProviders/untested/duck_duck_go.json index 1fab358e3..7058e3356 100644 --- a/SearchProviders/untested/duck_duck_go.json +++ b/SearchProviders/untested/duck_duck_go.json @@ -3,17 +3,16 @@ "active": false, "default": false, "connector": "RequestsGet", - "url": "https://api.duckduckgo.com/", - "query_template": "{url}?q={query_string}&format=json&pretty=1", + "url": "https://api.duckduckgo.com/?format=json&pretty=1", + "query_template": "{url}&q={query_string}", "query_processors": ["AdaptiveQueryProcessor"], "query_mappings": "", "result_processors": [ "MappingResultProcessor", - "LenLimitingResultProcessor", "CosineRelevancyResultProcessor" ], "response_mappings": "RESULTS=RelatedTopics", - "result_mappings": "url=meta['developer']['url'],body=snippet,author=meta['developer']['name']", + "result_mappings": "title=FirstURL,body=Result,author=meta.developer.name", "results_per_query": 10, "tags": ["DuckDuckGo", "Search"] } From f0ac15a01dcf56df6ec3f10ae58e9ca75314d9ba Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Thu, 9 Nov 2023 11:14:43 -0500 Subject: [PATCH 48/88] rev Dockerfile Python image to latest 3.ll.x version --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 3122949c6..c708b2169 100755 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.11.5-slim-bookworm +FROM python:3.11.6-slim-bookworm # try to upgrade to a more recent vesion of openssl RUN apt-get update From f887f204c21e2831f9b43f906c732012d93f878b Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Thu, 9 Nov 2023 15:44:09 -0500 Subject: [PATCH 49/88] add Trello SP + fresh db.dist for next release --- SearchProviders/atlassian.json | 35 +++++++++++++++++++++++++++++++++ SearchProviders/preloaded.json | 35 +++++++++++++++++++++++++++++++++ db.sqlite3.dist | Bin 294912 -> 299008 bytes 3 files changed, 70 insertions(+) diff --git a/SearchProviders/atlassian.json b/SearchProviders/atlassian.json index b65bcc218..dc34c6aba 100644 --- a/SearchProviders/atlassian.json +++ b/SearchProviders/atlassian.json @@ -50,5 +50,40 @@ "Atlassian", "Internal" ] + }, + { + "name": "Cards - Atlassian Trello", + "shared": true, + "active": false, + "default": false, + "authenticator": "", + "connector": "RequestsGet", + "url": "https://api.trello.com/1/search?modelTypes=cards&card_board=true&card_members=true&card_attachments=true&partial=true&card_list=true&card_fields=id,closed,dueComplete,dateLastActivity,desc,due,email,labels,name,start,url", + "query_template": "{url}&query={query_string}&key=&token=", + "post_query_template": {}, + "http_request_headers": { + "Content-Type": "application/json" + }, + "page_fetch_config_json": {}, + "query_processors": [ + "AdaptiveQueryProcessor" + ], + "query_mappings": "", + "result_grouping_field": "", + "result_processors": [ + "MappingResultProcessor", + "CleanTextResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "RESULTS=cards", + "result_mappings": "title=name,body=desc,date_published=dateLastActivity,url=url,id,board.name,list.name,labels[*].color,members[*].fullName,attachments[*].name,email,start,due,dueComplete,closed,NO_PAYLOAD", + "results_per_query": 10, + "credentials": "", + "eval_credentials": "", + "tags": [ + "Trello", + "Atlassian", + "Internal" + ] } ] diff --git a/SearchProviders/preloaded.json b/SearchProviders/preloaded.json index 818b1633a..9c6dd380e 100644 --- a/SearchProviders/preloaded.json +++ b/SearchProviders/preloaded.json @@ -1158,5 +1158,40 @@ "Swirl", "Documentation" ] + }, + { + "name": "Cards - Atlassian Trello", + "shared": true, + "active": false, + "default": false, + "authenticator": "", + "connector": "RequestsGet", + "url": "https://api.trello.com/1/search?modelTypes=cards&card_board=true&card_members=true&card_attachments=true&partial=true&card_list=true&card_fields=id,closed,dueComplete,dateLastActivity,desc,due,email,labels,name,start,url", + "query_template": "{url}&query={query_string}&key=&token=", + "post_query_template": {}, + "http_request_headers": { + "Content-Type": "application/json" + }, + "page_fetch_config_json": {}, + "query_processors": [ + "AdaptiveQueryProcessor" + ], + "query_mappings": "", + "result_grouping_field": "", + "result_processors": [ + "MappingResultProcessor", + "CleanTextResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "RESULTS=cards", + "result_mappings": "title=name,body=desc,date_published=dateLastActivity,url=url,id,board.name,list.name,labels[*].color,members[*].fullName,attachments[*].name,email,start,due,dueComplete,closed,NO_PAYLOAD", + "results_per_query": 10, + "credentials": "", + "eval_credentials": "", + "tags": [ + "Trello", + "Atlassian", + "Internal" + ] } ] diff --git a/db.sqlite3.dist b/db.sqlite3.dist index f113b3b94e01e2f542cdbd20df32c114e3ad4eeb..1ab7c18b31276c46e229e29c2d0c6c91447647dc 100644 GIT binary patch delta 3926 zcmZveTWlOx8OOceW13WPwsvfNs|?9-sS}lVlS%hGd;Ov16OiHj0w9cTz7| zd)K?;qz;IrR4D?1)M_N=0ijTV`ogu1)Oi3wc_0$vQf}b|6+wB(0|-$eDggp<&W>x( z*z>Tf-Sa#D`7Y=CzBza9wBPw^`^&v8rKYB)$yd@RW2M)xc6YSNG=@hhK8m4%Fz1jt zpX(S3twKQra(>WpGQ7$WGo8JiXOqDvK*|W5A8u(vb52{b%emV*8dhP!sq@3md{70Y zG?|HeaFMA z6bb0i6y6L%TO&xha3d8qMpU_T=X}qSOpiBYdEltUTxk7QdwBo!v&>7M}AtiLaG>{Gj1acgiNYUjlK0>8GhT3tVXVPf@SeX@S* z+)$0LoU$jMhR>GH!d$LCd$YW_Fg17Nxux~uqB#L`AHR96dWKEY8#gP?&npKu9V5t~ zSVd`mrutGf_GYyie7Ebd#5-yZHBBb|lXySzUgEEbM~Qb6@7!;zK1ej@4|(s7$M;PF z3MF^{^?ZD&@yi=rkN>$VK9Z2`V=u?2n-lW>VERvcTR?nwY{WZpFaERD!yjae5ks9K zh9-uXj3G1zw-M~dEYRWQ+raG@4lrb@=EqSgO(dRJD%fXc=H07@K%d*W5Bxo75>ief zi4tqFVRE7ae82g~rb(<$LK4h&s(eDB`{fk)l=oT^Jm0)!3>!iqldCS>9-mK4_eT^+l$g_ zBCysDda4X&0>#4&!Kg~TL8ysEL?ijc^asTk^I%uV4o2!e5K-35&;(=et+U|G!09pr z3KOYL-;wzS!H{~dPJzz{6x5IuOBC64nqcl-JP&>nI3cd)tl8+`?L)yQs(V z66gxZn4wQJO6C*BJ!MUT(aoVM3`_(^5y|$6*R}v21d2cyY9I;{Q8qqdo_br128x;+ zQr$jrWZpC(jq&R(ErC0+tvSjKbvj(~oi)%P0($MA0PhD%fE!#2EF<%*h?)r=DT7~! z6pX8C7EvZN!KMdS!0*AVQ9jjE}CxdAD;xR;PF;))%rb#$v6lb02HlJyEC?>*=v{&Mp_~ z^-^IaJzulR;gJyckmg|@2;Hyz3xl~47-181A7Y%zphl* z7E~}xRx!K7P31*aeKIPlM$(LihdT6)Uk$%69los6Uar(K8=2G#*VfAA^X}l2pqu+K n`f=%)gPGY#*7@lxGr4D`rgI;8{L%qGPS=p?_V=x4!NLCnEef-G delta 3310 zcmZveTWl3Y7{_}!1Ns7ow&&8@>4lV*(y+|^QYhFWWhq5ysUR^JE*efNmZE?y7}J=b z(MW=s#7#5?35FM=2qesdiI)dSOhk;g7ZV>a1`~Y{jXY>foIPuLc6#=8e*53=`~LII zH#7Hk-P~N=x#q>CnwlE_wf2pv(v|%U*;>(#SRWF7h<>sH1m$?w;u^F|*B0{nOs>aK355yQALo2WB^X9zymMYf^9xjj3aqE{ z%2go<<>(5l+Fp+JZN95Mo(mU55x+xE=bLnQq3AM-8TZ@E>{y}bz)5Vg^}=!_CJk!S3|4IIHS~WDYLg0GQ>Fdo6GEu%b=n}LBFZYP-AO63n&Fg zQtxlv5|0E0rwIEEWyLp*P8Luw2$f$bvtrW%MmQFJzRV6cwZ`3r!fvGO&sld@>brI) z-)XCJGT}lHtiNkowLIoICadVlIW=(Y(@GWdstY&onD{6(UUatp$Ie98#fc zTd4CbB~)^~yG1&yL{WzH`4-`>GD(QO)3V-OMVw(xi`XdwD~R;Tq8kiLDvAEE=$3}5 zfK>mx@LPls(}S&UX-Ggu^s5V3vE)L3-`eLS205luceLSR++AxJB{03Xrm%A0z~fI% zOz8`4xrz>ZN+}|^e!Z=|Qc$?c6w)`^Vu#8uO2s+3+TN8}(p^ZWNh-Kvw`NSyoxZM?uUYLGyI(2<|5%?gxBy-!&p&CE)Pv&0c zZsxbloy_fPwbM5<=I6EPw76 z;e7gwW4)0-vJX-Z5@dbD5R)|st$_=OWWll_P%tC4;B*SlL*hv(1VYuah+wj*bl~`w zo#Vm&ZqO3s>cQ`on8XuQNGy}YA)&&+EV#8OuHeuTnxqJcqQRR*@SM3+05gt^a=}Sr zmLZ`ml6}?$-d(ghRvf_$DwY(@5`=<*vmk5UFM?vaqN1J@_H-twB7%d|6QIdtIzVH( zQW^CGp_s$!SP>D_?CJz#vFxbF6k#mEy@&{Ns|)Oixj;S22v#B~geBGs4nJ5vcz(ig|X#OWQbA ztr`&p%|+r=&OEG?L@G%U5<*RpgHujc_KFcKp>pPhM6kdr)&^g#1Hk;HfO53Zev%w5 zBm{@r4s)s>?2O5{-A{;nCO{o2 zw$|IWt1^;%Az{I%>VIiqr7O(dA#g8M>2&TPBCUH#`jDt#=@3|LF8bgPXZ!4&6+ChK zLL!m*WEfm`6w%Ws!nTOLlM_Zn zL}=Lyvu_MQM}?S>JW&-A8N5*hkD1(V>wD!`2@fK{l4fm0gbXh41K3=81Uwm&aXVP- zrJWEgBAi9CsYgNHkqKhoBME+UJ9D5NJH|#+pl5#WNxQGizD29ui-@T`2yVpsNZ31) zGnC+VM2N91*yGqHnSl2BRJA?AZm7NJqrqA#fw@?_3A=S%$b?A|5oPKoZGXmnW`e7v zgDC9K;fioWj)1Qn1tr`@NlGjv1Q|F5egKs-Aic0LX~G$bNK{H-?}!O=^%<}(W}EaF z Date: Thu, 9 Nov 2023 16:16:27 -0500 Subject: [PATCH 50/88] cherry-pick only the readme updates from main --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index a3d32e04c..47522fef8 100644 --- a/README.md +++ b/README.md @@ -89,6 +89,8 @@ export MSAL_HOST=localhost export OPENAI_API_KEY=β€˜β€™ ``` +:key: Check out [OpenAI's YouTube video](https://youtu.be/nafDyRsVnXU?si=YpvyaRvhX65vtBrb) if you don't have an OpenAI API Key. + * In MacOS or Linux, run the following command from the Console: ``` From baaea372a8a00015307001e34d9a1a046fe73909 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Mon, 13 Nov 2023 15:32:13 -0500 Subject: [PATCH 51/88] add Asana SP JSON + clear db.dist for next release --- SearchProviders/asana.json | 35 +++++++++++++++++++++++++++++++++ SearchProviders/preloaded.json | 35 +++++++++++++++++++++++++++++++++ db.sqlite3.dist | Bin 299008 -> 299008 bytes 3 files changed, 70 insertions(+) create mode 100644 SearchProviders/asana.json diff --git a/SearchProviders/asana.json b/SearchProviders/asana.json new file mode 100644 index 000000000..0292abf96 --- /dev/null +++ b/SearchProviders/asana.json @@ -0,0 +1,35 @@ +{ + "name": "Tasks - Asana.com", + "shared": true, + "active": false, + "default": false, + "authenticator": "", + "connector": "RequestsGet", + "url": "https://app.asana.com/api/1.0/workspaces//tasks/search?opt_fields=gid,resource_type,assignee_status,completed,completed_at,completed_by.name,created_at,created_by.name,due_on,likes.user.name,modified_at,name,notes,num_subtasks,start_on,assignee.name,assignee_section.name,followers.name,parent.name,permalink_url,projects.name,tags.name,workspace.name", + "query_template": "{url}&text={query_string}", + "post_query_template": {}, + "http_request_headers": { + "Content-Type": "application/json" + }, + "page_fetch_config_json": {}, + "query_processors": [ + "AdaptiveQueryProcessor" + ], + "query_mappings": "", + "result_grouping_field": "", + "result_processors": [ + "MappingResultProcessor", + "CleanTextResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "RESULTS=data", + "result_mappings": "title=name,body=notes,date_published=created_at,author=created_by.name,url=permalink_url,gid,resource_type,num_subtasks,start_on,due_on,assignee.name,assignee_status,assignee_section.name,completed,completed_at,completed_by.name,modified_at,workspace.name,projects[*].name,parent.name,followers[*].name,tags[*].name,likes[*].user.name,NO_PAYLOAD", + "results_per_query": 10, + "credentials": "bearer=", + "eval_credentials": "", + "tags": [ + "Asana", + "Tasks", + "Internal" + ] +} \ No newline at end of file diff --git a/SearchProviders/preloaded.json b/SearchProviders/preloaded.json index 9c6dd380e..3780de685 100644 --- a/SearchProviders/preloaded.json +++ b/SearchProviders/preloaded.json @@ -1193,5 +1193,40 @@ "Atlassian", "Internal" ] + }, + { + "name": "Tasks - Asana.com", + "shared": true, + "active": false, + "default": false, + "authenticator": "", + "connector": "RequestsGet", + "url": "https://app.asana.com/api/1.0/workspaces//tasks/search?opt_fields=gid,resource_type,assignee_status,completed,completed_at,completed_by.name,created_at,created_by.name,due_on,likes.user.name,modified_at,name,notes,num_subtasks,start_on,assignee.name,assignee_section.name,followers.name,parent.name,permalink_url,projects.name,tags.name,workspace.name", + "query_template": "{url}&text={query_string}", + "post_query_template": {}, + "http_request_headers": { + "Content-Type": "application/json" + }, + "page_fetch_config_json": {}, + "query_processors": [ + "AdaptiveQueryProcessor" + ], + "query_mappings": "", + "result_grouping_field": "", + "result_processors": [ + "MappingResultProcessor", + "CleanTextResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "RESULTS=data", + "result_mappings": "title=name,body=notes,date_published=created_at,author=created_by.name,url=permalink_url,gid,resource_type,num_subtasks,start_on,due_on,assignee.name,assignee_status,assignee_section.name,completed,completed_at,completed_by.name,modified_at,workspace.name,projects[*].name,parent.name,followers[*].name,tags[*].name,likes[*].user.name,NO_PAYLOAD", + "results_per_query": 10, + "credentials": "bearer=", + "eval_credentials": "", + "tags": [ + "Asana", + "Tasks", + "Internal" + ] } ] diff --git a/db.sqlite3.dist b/db.sqlite3.dist index 1ab7c18b31276c46e229e29c2d0c6c91447647dc..131f24c823bb1bd99b23eefecb40517b42b834cd 100644 GIT binary patch delta 4331 zcmZvfTWl298OK@gp)pkfE!bY)7GDCUChOUAnR5dSUEIK8E(J_T6BL=TJ%NSw?y|GS zacJUH4Q(R@DQKilr6>ugD5|QeQ4&@2P^t8#YG0ZXwe%rU-zw#yQmd%VLmwh~X2)yq znH^b{X3lT^=X~G)`_A$4Yj=0Nc6Y~H1Bt752B#8NKfILg+JVS0hWoMHkBLDbG)>ce zqw7g+7z>23aDUr%NFU}>Q0{K)KHl3p3FZPy^ZeN6O=!kVrF-04-QSGp7!k~U(0weT z!$j$Juj|a7s18BI+;Wf5bxa~i&WA?sj0*bz&qhzSe}88uU%JJXwqK$B8g7;M0I+8|^mEDSW@ z=TQhv3R$7Q0a0H@3k69D8Fw%B9o1GtLJCF;y$#J@`t(qc5R}kDx&br&QOL1`g`Nhy z+@H~$MuK8WC%cu?d)zNnW$zEA^l${`47-o|_v*u3aw^seQ%KI!^jkpxt|Z}g+>SpWV&Vs z>0d+D_QGAx;OMZj*8NQ;5+F1sh3;1wqO}%+m{T{uOKNjKWe7&@o4fSfp=40y{8A7{SyH?r9D1*t(o6>vo*1}@3?A{yFH#!t1qR9Qp(2%~oP#H7#4?}wW)Y=e= zkA}y#e{DRSOp=86VJWfmoE`?dHcL!=G*243b+CJ)-BP8|>= zeAop(>1dHKLsC`j7)eM7_1?~a@A>bi!LqIsh+&4?mtsahgc<67-w%G#@zh3w%wU`& z8e>dIxR5` z%x-YDRl*GkscEXCw7q@-VaR_t22SY`&Lku2}H7`jf6pvN+@QZgaij| z)bDK{0R#T+NpP@LD-8h&i>qx&sF%MDdcAk1zz%;`9vp17UmA>Yruw_BFG7Mvu&+<} zpH6{PYuighl}!Cji_c_2gF4F@20`8bqT^O_Uw{IKm_*hodj>RN`#@V093}<(jW{9|H5hT$7cBn z+z+8)staB8q}w1u_|xaWk>pxehJct9qexe75HB?cx;=GtO#9sz0BP1O`~V|JqxNkO zbzFV$0>J+J-vw879pqH)cir|-7Z4^wUHKyDZtB!6mP$hsNl;9Gh6GDrttY_RVGvA+ z3@j;+w&ey1kGMWRJEu-mO^2u?YJY1j8R`OJ!eAfuc}lm>f7%BB(8WqcNKAgn2#85N z^%tYdFJ1w!HVYnRFNBada;9t$g8iLW!GCoLr`Y7Nl7I-}lM?t;m#7X^*S7Z5prMFB z*oS5C8CW|Q!kmQ(WxN{!F~g0fnxyD|-Uj#&;F0Wq=PdjmYsC6uW{EY|#h zsJ>E?F@Jmo{7~1Ksym`*a+p9sBoF$k*ZsMH=jxA*@yw>& zmVCambL*40oWrU7);(LlKY8nLV(Wq9&!o1Tx14Lv@WgQ5vC5V)Us-Io4;$)#DXa?E z)-w&F{sTK_UthAFx^v8~U#Zut&i-7^s#Xolu~x%L)oU@Qnk2VesaY}>O= z-KsA+SrvM9O(3Zo>Etyr9)uAJCg-t-oH}d>zf;Scz3=H23D=5`jtv; zvT07!5jF8-Rg0*-6!AW+d8yga;zn4tCY@c??1?XCOf6e0qg%4>+{Jw_w41!{f0GVp z18rLR@IRbVLhfUkqEE8w)=MP;RZ9I==hesPb$$R&CV0w?<1bOY4k?I-{ Rz3+tkHz3kicZdfa{|8=;KCS=& delta 3407 zcmZveOKep|7{|H2gKn^-(DwFmZy#7_%j0z34-lnP;TBtT-Y)j#Ca&!-)UJT44a1ETZ^SS{m~+VV`P#>QGZaK1{dw(^88b&r z>gu}w2Ja9^8G$2fmM%fVx~5RC&((EhbXahzZ`N)1bWkbD)JVre9j*6u&~ixiWWCJj zxQ%0ImDLSa{QX$Jx;7mUR9JCTZr3O3n{;QPn1PTJS#GPq=|Zu_Q%XUwe!sBEiyu-! z2-3e7Hu)z}LO3ax>`9G!xS^DR2oY2+R^WSCNCd{J++2Zo7eRy=FE>>nYApGoV8$5L z`y03U-Vn!}m3C6P?u zFOh}Ta%W?Xpzdu^{u#Cp?AzI{VHEW$nf>HHOn9AD&+Gd7hgM`TY2ND zo`Ee}$4(B4kuiQ`a{9#F&Y`~P(V704GlLx``Q+A9{d?i_6FXpU@64Xr;|IqEhSwb( zpF4C=_QBrgW{;fM&UVq$vs3!JsrH4`5EM~tFZQesp^ty-FW+Wm=VJI=gWCKDQvINJEZ37Kvpaz`Dku*e-U^{*RnIj7K zjUPC*efLPbzY`SWx|QIM^=UPvoI(;(JA#pcwcu7&*MbDw+$)0FVu>U1zlT5}{WzPy$%n?S+o^@ci?}aj|Lg^!sBZ!z=8^G)NlouLF zJ7A$&B}5?e3IhM+)-IHnMqCiPT|?P+#9SeMSTtgUySZG{4+%lslsK64!?Mer+pZ2s z98n~;lAieXCIHOe3P@j!x$_tjJ0h?Xw;S6JyV5!q2}W&R7wZzjNnE`Rbeoo5@I*T8 zEaFHZ9Mm~Ngws81?(~5gUq|i2m4QXTB1;tH#_G1D2ZTk0N-P4Sl@RH6$6E728Qe?v z5{odh4}n0FCj?{j#SZYPFM&cLJO7I+z#>a9q~^*1IG>i#NKvd%a10&6+>Gu7-=rs( z+Z8J9$S?MikpBmM-yYDDt`v{3B0>Z_B}9t&-4fVm>P7)cXPiff*#HBbm=MO}Pxb+9 zuI>fLe4WjLq`@#9VJy*2z5wddIu^O|hI9#G+*p4Sq;{EzFcBOD@}3alR^ZcSbPSwL zdlHe|n}P&-DIpBGo^OoTkJ2!p+{aI_q$HA-Vdh^JZjw>E2 zo+ZNe@5*D~FJHoyz$#b(ONi|%ava?8C5+pS7a=c>U}@ka_!*>@ny`N}T-XQkqTWr2 zB(6PPHrHpsMn8%6j}Y@<5*-oJG@J%E)2R~?v(vx>la&w=7fypt(>({?@I4VhSg0{X yV(D_1eX~p?F{!i^}UtnBKQ+#Qy=a-};dN From 75921349b456a99cd1717553dae6b71f74c31ab5 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Fri, 17 Nov 2023 09:35:53 -0500 Subject: [PATCH 52/88] patched from enterprise --- swirl/utils.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/swirl/utils.py b/swirl/utils.py index 4ce1c9693..88290a874 100644 --- a/swirl/utils.py +++ b/swirl/utils.py @@ -40,13 +40,22 @@ def is_running_celery_redis(): for url in celery_urls: if not (purl := safe_urlparse(url)): continue - if not (purl.scheme or purl.scheme.lower() == 'redis'): + if not (purl.scheme or purl.scheme.lower() == 'redis' or purl.scheme.lower() == 'rediss'): continue parsed_redis_urls.append(purl) for url in parsed_redis_urls: try: - r = redis.StrictRedis(host=url.hostname, port=url.port, db=0, decode_responses=True) + password = url.password + hostname = url.hostname + port = url.port + db = int(url.path.lstrip('/')) if url.path else 0 # Extracting DB index, default is 0 + scheme = url.scheme + use_ssl = scheme.lower() == 'rediss' # Enable SSL if the scheme is 'rediss' + r = redis.StrictRedis(host=hostname, port=port, db=db, password=password, + ssl=use_ssl, + ssl_cert_reqs='required' if use_ssl else None, + decode_responses=True) response = r.ping() if response: print(f"{url} checked.") From 271a5914b2232d1cadd415f8b178eb7177883658 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Fri, 17 Nov 2023 09:51:57 -0500 Subject: [PATCH 53/88] rename new swagger.json file --- Swirl-Metasearch.swagger.json => Swirl.swagger.json | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename Swirl-Metasearch.swagger.json => Swirl.swagger.json (100%) diff --git a/Swirl-Metasearch.swagger.json b/Swirl.swagger.json similarity index 100% rename from Swirl-Metasearch.swagger.json rename to Swirl.swagger.json From acb5fc92a7dc8dd174862855abed1f00a34ae37d Mon Sep 17 00:00:00 2001 From: Harshil Khamar <73790584+Harshil0512@users.noreply.github.com> Date: Mon, 20 Nov 2023 11:57:47 +0530 Subject: [PATCH 54/88] Result Mapping And Response Mapping Fixed --- SearchProviders/untested/yelp.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SearchProviders/untested/yelp.json b/SearchProviders/untested/yelp.json index 1db708de1..fd05fe7e7 100644 --- a/SearchProviders/untested/yelp.json +++ b/SearchProviders/untested/yelp.json @@ -11,9 +11,9 @@ "MappingResultProcessor", "CosineRelevancyResultProcessor" ], - "response_mappings": "FOUND=total,RESULTS=results", - "result_mappings": "title=term,body=businesses", + "response_mappings": "FOUND=total,RESULTS=businesses", + "result_mappings": "title=name,body=businesses[*],url=url", "results_per_query": 10, "credentials": "bearer=", "tags": ["Yelp", "Search"] - } \ No newline at end of file + } From 581e81e583a91437ff53780ccb1342ff70a0e929 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Wed, 22 Nov 2023 12:09:03 +0100 Subject: [PATCH 55/88] upgrade openai --- requirements.txt | 2 +- swirl/connectors/chatgpt.py | 13 +++++++------ swirl/processors/chatgpt_query.py | 11 ++++++----- swirl/processors/rag.py | 9 +++++---- 4 files changed, 19 insertions(+), 16 deletions(-) diff --git a/requirements.txt b/requirements.txt index bc23d2702..6adb50f0a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ nltk bs4 google-cloud-bigquery opensearch-py -openai==0.28.1 +openai==1.3.4 msal PyJWT pyahocorasick diff --git a/swirl/connectors/chatgpt.py b/swirl/connectors/chatgpt.py index 7dba33a13..4208e3019 100644 --- a/swirl/connectors/chatgpt.py +++ b/swirl/connectors/chatgpt.py @@ -24,7 +24,8 @@ from datetime import datetime -import openai +from openai import OpenAI + MODEL_3 = "gpt-3.5-turbo" MODEL_4 = "gpt-4" @@ -48,12 +49,12 @@ def __init__(self, provider_id, search_id, update, request_id=''): def execute_search(self, session=None): logger.debug(f"{self}: execute_search()") - + client = None if self.provider.credentials: - openai.api_key = self.provider.credentials + client = OpenAI(api_key=self.provider.credentials) else: if getattr(settings, 'OPENAI_API_KEY', None): - openai.api_key = settings.OPENAI_API_KEY + client = OpenAI(api_key=settings.OPENAI_API_KEY) else: self.status = "ERR_NO_CREDENTIALS" return @@ -79,13 +80,13 @@ def execute_search(self, session=None): return logger.info(f'CGPT completion system guide:{self.system_guide} query to provider : {self.query_to_provider}') self.query_to_provider = prompted_query - completions = openai.ChatCompletion.create( + completions = client.chat.completions.create( model=MODEL, messages=[ {"role": "system", "content": self.system_guide}, {"role": "user", "content": self.query_to_provider}, ], - temperature=0, + temperature=0 ) message = completions['choices'][0]['message']['content'] # FROM API Doc diff --git a/swirl/processors/chatgpt_query.py b/swirl/processors/chatgpt_query.py index a5db8b203..3af95a6bf 100644 --- a/swirl/processors/chatgpt_query.py +++ b/swirl/processors/chatgpt_query.py @@ -12,7 +12,8 @@ from swirl.processors.processor import * from swirl.processors.utils import get_tag -import openai +from openai import OpenAI + MODEL_3 = "gpt-3.5-turbo" MODEL_4 = "gpt-4" @@ -95,20 +96,20 @@ def process(self): self.set_prompt_from_tags() self.set_do_filter_from_tags() logger.info(f"{self.type} model {MODEL} system guide {self.system_guide} prompt {self.prompt} Do Filter {self.do_filter}") - + client = None if getattr(settings, 'OPENAI_API_KEY', None): - openai.api_key = settings.OPENAI_API_KEY + client = OpenAI(api_key=settings.OPENAI_API_KEY) else: self.warning('API key not available') return self.query_string - response = openai.ChatCompletion.create( + response = client.chat.completions.create( model=MODEL, messages=[ {"role": "system", "content": self.system_guide}, {"role": "user", "content": self.prompt.format(query_string=self.query_string) }, ], - temperature=0, + temperature=0 ) message = response['choices'][0]['message']['content'] # FROM API Doc logger.info(f"ChatGPT Response: {message}") diff --git a/swirl/processors/rag.py b/swirl/processors/rag.py index a92ec5baf..edfcea76f 100644 --- a/swirl/processors/rag.py +++ b/swirl/processors/rag.py @@ -12,7 +12,7 @@ from datetime import datetime -import openai +from openai import OpenAI from celery import group import threading @@ -214,13 +214,13 @@ def background_process(self): return 0 try: - completions_new = openai.ChatCompletion.create( + completions_new = self.client.chat.completions.create( model=MODEL, messages=[ {"role": "system", "content": rag_prompt.get_role_system_guide_text()}, {"role": "user", "content": new_prompt_text}, ], - temperature=0, + temperature=0 ) model_response = completions_new['choices'][0]['message']['content'] # FROM API Doc logger.info(f'RAG: fetch_prompt_errors follow:') @@ -260,8 +260,9 @@ def process(self, should_return=False): logger.info('RUN RAG') # to do: remove foo:etc + self.client = None if getattr(settings, 'OPENAI_API_KEY', None): - openai.api_key = settings.OPENAI_API_KEY + self.client = OpenAI(api_key=settings.OPENAI_API_KEY) else: logger.warning("RAG OPENAI_API_KEY unset!") return 0 From 6f1b0c7a9a4a5e8def17009bbf029da4bf1c2194 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Wed, 22 Nov 2023 14:10:24 +0100 Subject: [PATCH 56/88] modify openai tests for new version --- swirl/tests/tests.py | 167 +++++++++++++++---------------------------- 1 file changed, 58 insertions(+), 109 deletions(-) diff --git a/swirl/tests/tests.py b/swirl/tests/tests.py index b07be0d8f..96efed190 100644 --- a/swirl/tests/tests.py +++ b/swirl/tests/tests.py @@ -19,6 +19,7 @@ from swirl.processors.dedupe import DedupeByFieldResultProcessor from swirl.utils import select_providers, http_auth_parse + logger = logging.getLogger(__name__) ###################################################################### @@ -484,57 +485,45 @@ def test_cgptqp_1(): tc = 'gig economy' expected = 'gig economy' - settings.OPENAI_API_KEY = "aFakeKey" - with mock.patch('openai.ChatCompletion.create') as mock_create: - mock_create.return_value = { - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Gig economy large scale economics" - }, - "finish_reason": "stop" - } - ], - } - cgptqp = ChatGPTQueryProcessor(tc, + with mock.patch('openai.OpenAI') as mock_openai: + client_instance = mock.MagicMock() + mock_openai.return_value = client_instance + mock_create = mock.MagicMock() + mock_create.return_value.choices[0].message.content = "Gig economy large scale economics" + client_instance.chat.completions.create = mock_create + cgptqp = ChatGPTQueryProcessor( + tc, '', ["PROMPT:Write a more precise query of similar length to this : {query_string}",] ) - actual = cgptqp.process() + actual = cgptqp.process(client=client_instance) assert actual == expected - mock_create.assert_called_once_with(model=MODEL, messages=[ + mock_create.assert_called_once_with( + model=MODEL, + messages=[ {"role": "system", "content": "You are helping a user formulate better queries"}, - {"role": "user", "content": "Write a more precise query of similar length to this : gig economy"} + {"role": "user", "content": "Write a more precise query of similar length to this : gig economy"} ], - temperature=0) + temperature=0 + ) @pytest.mark.django_db def test_cgptqp_2(): tc = 'gig economy' expected = 'gig economy' - settings.OPENAI_API_KEY = "aFakeKey" - with mock.patch('openai.ChatCompletion.create') as mock_create: - mock_create.return_value = { - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Gig economy large scale economics" - }, - "finish_reason": "stop" - } - ], - } + with mock.patch('openai.OpenAI') as mock_openai: + client_instance = mock.MagicMock() + mock_openai.return_value = client_instance + mock_create = mock.MagicMock() + mock_create.return_value.choices[0].message.content = "Gig economy large scale economics" + client_instance.chat.completions.create = mock_create cgptqp = ChatGPTQueryProcessor(tc, '', ["PROMPT:Write a more precise query of similar length to this : {query_string}", "CHAT_QUERY_REWRITE_GUIDE:You are a malevolent dictator"] ) - actual = cgptqp.process() + actual = cgptqp.process(client=client_instance) assert actual == expected mock_create.assert_called_once_with(model=MODEL, messages=[ {"role": "system", "content": "You are a malevolent dictator"}, @@ -547,26 +536,18 @@ def test_cgptqp_3(): tc = 'gig economy' expected = 'gig economy' - settings.OPENAI_API_KEY = "aFakeKey" - with mock.patch('openai.ChatCompletion.create') as mock_create: - mock_create.return_value = { - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Gig economy large scale economics" - }, - "finish_reason": "stop" - } - ], - } + with mock.patch('openai.OpenAI') as mock_openai: + client_instance = mock.MagicMock() + mock_openai.return_value = client_instance + mock_create = mock.MagicMock() + mock_create.return_value.choices[0].message.content = "Gig economy large scale economics" + client_instance.chat.completions.create = mock_create cgptqp = ChatGPTQueryProcessor(tc, '', ["CHAT_QUERY_REWRITE_PROMPT:Write a more precise query of similar length to this : {query_string}", "CHAT_QUERY_REWRITE_GUIDE:You are a malevolent dictator"] ) - actual = cgptqp.process() + actual = cgptqp.process(client=client_instance) assert actual == expected mock_create.assert_called_once_with(model=MODEL, messages=[ {"role": "system", "content": "You are a malevolent dictator"}, @@ -579,27 +560,19 @@ def test_cgptqp_4(): tc = 'gig economy' expected = 'gig economy' - settings.OPENAI_API_KEY = "aFakeKey" - with mock.patch('openai.ChatCompletion.create') as mock_create: - mock_create.return_value = { - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Gig economy large scale economics" - }, - "finish_reason": "stop" - } - ], - } + with mock.patch('openai.OpenAI') as mock_openai: + client_instance = mock.MagicMock() + mock_openai.return_value = client_instance + mock_create = mock.MagicMock() + mock_create.return_value.choices[0].message.content = "Gig economy large scale economics" + client_instance.chat.completions.create = mock_create cgptqp = ChatGPTQueryProcessor(tc, '', ["PROMPT:This should be used: {query_string}", "CHAT_QUERY_REWRITE_PROMPT:Write a more precise query of similar length to this : {query_string}", "CHAT_QUERY_REWRITE_GUIDE:You are a malevolent dictator"] ) - actual = cgptqp.process() + actual = cgptqp.process(client=client_instance) assert actual == expected mock_create.assert_called_once_with(model=MODEL, messages=[ {"role": "system", "content": "You are a malevolent dictator"}, @@ -613,20 +586,12 @@ def test_cgptqp_5(): tc = 'gig economy' expected = 'Gig economy large scale economics' - settings.OPENAI_API_KEY = "aFakeKey" - with mock.patch('openai.ChatCompletion.create') as mock_create: - mock_create.return_value = { - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Gig economy large scale economics" - }, - "finish_reason": "stop" - } - ], - } + with mock.patch('openai.OpenAI') as mock_openai: + client_instance = mock.MagicMock() + mock_openai.return_value = client_instance + mock_create = mock.MagicMock() + mock_create.return_value.choices[0].message.content = "Gig economy large scale economics" + client_instance.chat.completions.create = mock_create cgptqp = ChatGPTQueryProcessor(tc, '', ["PROMPT:This should be used: {query_string}", @@ -634,7 +599,7 @@ def test_cgptqp_5(): "CHAT_QUERY_REWRITE_GUIDE:You are a malevolent dictator", "CHAT_QUERY_DO_FILTER:False"] ) - actual = cgptqp.process() + actual = cgptqp.process(client=client_instance) assert actual == expected assert not cgptqp.do_filter mock_create.assert_called_once_with(model=MODEL, messages=[ @@ -649,20 +614,12 @@ def test_cgptqp_6(): tc = 'gig economy' expected = 'gig economy' - settings.OPENAI_API_KEY = "aFakeKey" - with mock.patch('openai.ChatCompletion.create') as mock_create: - mock_create.return_value = { - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Gig economy large scale economics" - }, - "finish_reason": "stop" - } - ], - } + with mock.patch('openai.OpenAI') as mock_openai: + client_instance = mock.MagicMock() + mock_openai.return_value = client_instance + mock_create = mock.MagicMock() + mock_create.return_value.choices[0].message.content = "Gig economy large scale economics" + client_instance.chat.completions.create = mock_create cgptqp = ChatGPTQueryProcessor(tc, '', ["PROMPT:This should be used: {query_string}", @@ -670,7 +627,7 @@ def test_cgptqp_6(): "CHAT_QUERY_REWRITE_GUIDE:You are a malevolent dictator", "CHAT_QUERY_DO_FILTER:True"] ) - actual = cgptqp.process() + actual = cgptqp.process(client=client_instance) assert actual == expected assert cgptqp.do_filter mock_create.assert_called_once_with(model=MODEL, messages=[ @@ -685,20 +642,12 @@ def test_cgptqp_7(): tc = 'gig economy' expected = 'gig economy' - settings.OPENAI_API_KEY = "aFakeKey" - with mock.patch('openai.ChatCompletion.create') as mock_create: - mock_create.return_value = { - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": "Gig economy large scale economics" - }, - "finish_reason": "stop" - } - ], - } + with mock.patch('openai.OpenAI') as mock_openai: + client_instance = mock.MagicMock() + mock_openai.return_value = client_instance + mock_create = mock.MagicMock() + mock_create.return_value.choices[0].message.content = "Gig economy large scale economics" + client_instance.chat.completions.create = mock_create cgptqp = ChatGPTQueryProcessor(tc, '', ["PROMPT:This should be used: {query_string}", @@ -706,7 +655,7 @@ def test_cgptqp_7(): "CHAT_QUERY_REWRITE_GUIDE:You are a malevolent dictator", "CHAT_QUERY_DO_FILTER:xxx"] ) - actual = cgptqp.process() + actual = cgptqp.process(client=client_instance) assert actual == expected assert cgptqp.do_filter == MODEL_DEFAULT_DO_FILTER mock_create.assert_called_once_with(model=MODEL, messages=[ From 2c8b9102a41c4323d486ae24dedfe56ef4d2e3d2 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Wed, 22 Nov 2023 14:10:46 +0100 Subject: [PATCH 57/88] new opanai return values structure --- swirl/connectors/chatgpt.py | 3 +-- swirl/processors/chatgpt_query.py | 2 +- swirl/processors/rag.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/swirl/connectors/chatgpt.py b/swirl/connectors/chatgpt.py index 4208e3019..35fc38de6 100644 --- a/swirl/connectors/chatgpt.py +++ b/swirl/connectors/chatgpt.py @@ -88,8 +88,7 @@ def execute_search(self, session=None): ], temperature=0 ) - message = completions['choices'][0]['message']['content'] # FROM API Doc - + message = completions.choices[0].message.content self.found = 1 self.retrieved = 1 self.response = message.replace("\n\n", "") diff --git a/swirl/processors/chatgpt_query.py b/swirl/processors/chatgpt_query.py index 3af95a6bf..1399ca3aa 100644 --- a/swirl/processors/chatgpt_query.py +++ b/swirl/processors/chatgpt_query.py @@ -111,7 +111,7 @@ def process(self): ], temperature=0 ) - message = response['choices'][0]['message']['content'] # FROM API Doc + message = response.choices[0].message.content logger.info(f"ChatGPT Response: {message}") if not self.do_filter: diff --git a/swirl/processors/rag.py b/swirl/processors/rag.py index edfcea76f..c2f936f9b 100644 --- a/swirl/processors/rag.py +++ b/swirl/processors/rag.py @@ -222,7 +222,7 @@ def background_process(self): ], temperature=0 ) - model_response = completions_new['choices'][0]['message']['content'] # FROM API Doc + model_response = completions_new.choices[0].message.content logger.info(f'RAG: fetch_prompt_errors follow:') for (k,v) in fetch_prompt_errors.items(): logger.info(f'RAG:\t url:{k} problem:{v}') From fe37405ec1203cb4845bba810be26dd7fb37b350 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Wed, 22 Nov 2023 14:11:24 +0100 Subject: [PATCH 58/88] add client argument for pytest --- swirl/processors/chatgpt_query.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/swirl/processors/chatgpt_query.py b/swirl/processors/chatgpt_query.py index 1399ca3aa..a5bb658f8 100644 --- a/swirl/processors/chatgpt_query.py +++ b/swirl/processors/chatgpt_query.py @@ -90,18 +90,18 @@ def set_do_filter_from_tags(self): logger.error(f"Exception parsing filter tag {filter_tag_value} using default: {MODEL_DEFAULT_DO_FILTER}") self.do_filter = MODEL_DEFAULT_DO_FILTER - def process(self): + def process(self, client=None): try: self.set_guide_from_tags() self.set_prompt_from_tags() self.set_do_filter_from_tags() logger.info(f"{self.type} model {MODEL} system guide {self.system_guide} prompt {self.prompt} Do Filter {self.do_filter}") - client = None - if getattr(settings, 'OPENAI_API_KEY', None): - client = OpenAI(api_key=settings.OPENAI_API_KEY) - else: - self.warning('API key not available') - return self.query_string + if client is None: + if getattr(settings, 'OPENAI_API_KEY', None): + client = OpenAI(api_key=settings.OPENAI_API_KEY) + else: + self.warning('API key not available') + return self.query_string response = client.chat.completions.create( model=MODEL, From e6a08b96db22cd2e2d572d4bb822ced80a9daf56 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Wed, 22 Nov 2023 15:24:37 +0100 Subject: [PATCH 59/88] remove AI block parsings --- SearchProviders/chatgpt.json | 2 +- SearchProviders/preloaded.json | 2 +- swirl/mixers/mixer.py | 74 +++++++++++++++++----------------- swirl/processors/generic.py | 10 ++--- swirl/processors/mapping.py | 14 +++---- swirl/processors/rag.py | 2 +- swirl_server/settings.py | 2 +- 7 files changed, 53 insertions(+), 53 deletions(-) diff --git a/SearchProviders/chatgpt.json b/SearchProviders/chatgpt.json index 81148a887..4b9118a8f 100644 --- a/SearchProviders/chatgpt.json +++ b/SearchProviders/chatgpt.json @@ -14,7 +14,7 @@ "CosineRelevancyResultProcessor" ], "response_mappings": "", - "result_mappings": "BLOCK=ai_summary", + "result_mappings": "", "results_per_query": 10, "credentials": "your-openai-API-key-here", "tags": [ diff --git a/SearchProviders/preloaded.json b/SearchProviders/preloaded.json index 3780de685..dd7088a8d 100644 --- a/SearchProviders/preloaded.json +++ b/SearchProviders/preloaded.json @@ -114,7 +114,7 @@ "CosineRelevancyResultProcessor" ], "response_mappings": "", - "result_mappings": "BLOCK=ai_summary", + "result_mappings": "", "results_per_query": 10, "credentials": "your-OpenAI-API-key-here", "tags": [ diff --git a/swirl/mixers/mixer.py b/swirl/mixers/mixer.py index 0caf4af9a..0dadcb702 100644 --- a/swirl/mixers/mixer.py +++ b/swirl/mixers/mixer.py @@ -100,9 +100,9 @@ def __init__(self, search_id, results_requested, page, explain=False, provider=N self.mix_wrapper['info'][result.searchprovider]['query_to_provider'] = result.query_to_provider self.mix_wrapper['info'][result.searchprovider]['query_processors'] = result.query_processors self.mix_wrapper['info'][result.searchprovider]['result_processors'] = result.result_processors - if result.json_results: - if 'result_block' in result.json_results[0]: - self.mix_wrapper['info'][result.searchprovider]['result_block'] = result.json_results[0]['result_block'] + # if result.json_results: + # if 'result_block' in result.json_results[0]: + # self.mix_wrapper['info'][result.searchprovider]['result_block'] = result.json_results[0]['result_block'] self.mix_wrapper['info'][result.searchprovider]['search_time'] = result.time if self.search.messages: @@ -201,48 +201,48 @@ def finalize(self): if 'swirl_score' in result: del result['swirl_score'] # end if - if 'result_block' in result: - block_name = result['result_block'] - del result['result_block'] - if block_name in block_dict: - block_count = block_count + 1 - result['swirl_rank'] = block_count - block_dict[block_name].append(result) - else: - block_count = 1 - result['swirl_rank'] = block_count - block_dict[block_name] = [result] - # end if - peek_search_provider = result.get('searchprovider', None) - if peek_search_provider and self.mix_wrapper['info'].get(peek_search_provider, None): - del self.mix_wrapper['info'][result['searchprovider']] - # end if - else: - result['swirl_rank'] = mixed_result_number - mixed_results.append(result) - mixed_result_number = mixed_result_number + 1 + # if 'result_block' in result: + # block_name = result['result_block'] + # del result['result_block'] + # if block_name in block_dict: + # block_count = block_count + 1 + # result['swirl_rank'] = block_count + # block_dict[block_name].append(result) + # else: + # block_count = 1 + # result['swirl_rank'] = block_count + # block_dict[block_name] = [result] + # # end if + # peek_search_provider = result.get('searchprovider', None) + # if peek_search_provider and self.mix_wrapper['info'].get(peek_search_provider, None): + # del self.mix_wrapper['info'][result['searchprovider']] + # # end if + # else: + result['swirl_rank'] = mixed_result_number + mixed_results.append(result) + mixed_result_number = mixed_result_number + 1 # end if # end for # block results - self.mix_wrapper['info']['results']['result_blocks'] = [] + # self.mix_wrapper['info']['results']['result_blocks'] = [] # default block, if specified in settings - if settings.SWIRL_DEFAULT_RESULT_BLOCK: - self.mix_wrapper['info']['results']['result_blocks'].append(settings.SWIRL_DEFAULT_RESULT_BLOCK) - self.mix_wrapper[settings.SWIRL_DEFAULT_RESULT_BLOCK] = [] + # if settings.SWIRL_DEFAULT_RESULT_BLOCK: + # self.mix_wrapper['info']['results']['result_blocks'].append(settings.SWIRL_DEFAULT_RESULT_BLOCK) + # self.mix_wrapper[settings.SWIRL_DEFAULT_RESULT_BLOCK] = [] # blocks specified by provider(s) - moved_to_block = 0 - for block in block_dict: - self.mix_wrapper[block] = block_dict[block] - moved_to_block = moved_to_block + len(block_dict[block]) - if not block in self.mix_wrapper['info']['results']['result_blocks']: - self.mix_wrapper['info']['results']['result_blocks'].append(block) - if moved_to_block > 0: - self.mix_wrapper['info']['results']['retrieved_total'] = self.found - moved_to_block - if self.mix_wrapper['info']['results']['retrieved_total'] < 0: - self.warning("Block count exceeds result count") + # moved_to_block = 0 + # for block in block_dict: + # self.mix_wrapper[block] = block_dict[block] + # moved_to_block = moved_to_block + len(block_dict[block]) + # if not block in self.mix_wrapper['info']['results']['result_blocks']: + # self.mix_wrapper['info']['results']['result_blocks'].append(block) + # if moved_to_block > 0: + # self.mix_wrapper['info']['results']['retrieved_total'] = self.found - moved_to_block + # if self.mix_wrapper['info']['results']['retrieved_total'] < 0: + # self.warning("Block count exceeds result count") # extract the page of mixed results self.mixed_results = mixed_results diff --git a/swirl/processors/generic.py b/swirl/processors/generic.py index 34edf7f01..46074ba83 100644 --- a/swirl/processors/generic.py +++ b/swirl/processors/generic.py @@ -44,13 +44,13 @@ def process(self): use_payload = True file_system = False - result_block = None + # result_block = None if 'NO_PAYLOAD' in self.provider.result_mappings: self.warning(f"NO_PAYLOAD is not supported by GenericResultProcessor, ignoring") if 'FILE_SYSTEM' in self.provider.result_mappings: file_system = True - if 'BLOCK' in self.provider.result_mappings: - result_block = get_mappings_dict(self.provider.result_mappings)['BLOCK'] + # if 'BLOCK' in self.provider.result_mappings: + # result_block = get_mappings_dict(self.provider.result_mappings)['BLOCK'] list_results = [] result_number = 1 @@ -81,8 +81,8 @@ def process(self): if file_system: swirl_result['_relevancy_model'] = 'FILE_SYSTEM' - if result_block: - swirl_result['result_block'] = result_block + # if result_block: + # swirl_result['result_block'] = result_block # try to find a title, if none provided if swirl_result['title'] == "": diff --git a/swirl/processors/mapping.py b/swirl/processors/mapping.py index 9d8af17a0..2dc016c55 100644 --- a/swirl/processors/mapping.py +++ b/swirl/processors/mapping.py @@ -49,7 +49,7 @@ def process(self): list_results = [] provider_query_term_results = [] - result_block = "" + # result_block = "" json_types = [str,int,float,list,dict] use_payload = True @@ -88,11 +88,11 @@ def process(self): # control codez if swirl_key.isupper(): # to do: check the result mappings list??? - if swirl_key == 'BLOCK': - result_block = source_key - else: + # if swirl_key == 'BLOCK': + # result_block = source_key + # else: # ignore for now - continue + continue # check for field list | source_field_list = [] if '|' in source_key: @@ -254,8 +254,8 @@ def process(self): # final assembly if payload: swirl_result['payload'] = payload - if result_block: - swirl_result['result_block'] = result_block + # if result_block: + # swirl_result['result_block'] = result_block # try to find a title, if none provided if swirl_result['title'] == "": if swirl_result['url']: diff --git a/swirl/processors/rag.py b/swirl/processors/rag.py index a92ec5baf..8f86e6343 100644 --- a/swirl/processors/rag.py +++ b/swirl/processors/rag.py @@ -248,7 +248,7 @@ def background_process(self): rag_result['author'] = 'ChatGPT' rag_result['searchprovider'] = 'ChatGPT' rag_result['searchprovider_rank'] = 1 - rag_result['result_block'] = 'ai_summary' + # rag_result['result_block'] = 'ai_summary' rag_result['rag_query_items'] = [str(item['swirl_id']) for item in chosen_rag] result = Result.objects.create(owner=self.search.owner, search_id=self.search, provider_id=5, searchprovider='ChatGPT', query_string_to_provider=new_prompt_text[:256], query_to_provider='None', status='READY', retrieved=1, found=1, json_results=[rag_result], time=0.0) diff --git a/swirl_server/settings.py b/swirl_server/settings.py index 10072bef0..b4b632696 100644 --- a/swirl_server/settings.py +++ b/swirl_server/settings.py @@ -264,7 +264,7 @@ SWIRL_SEARCH_FORM_URL_DEF = '/swirl/search.html' SWIRL_SEARCH_FORM_URL = env('SWIRL_SEARCH_FORM_URL', default=SWIRL_SEARCH_FORM_URL_DEF) -SWIRL_DEFAULT_RESULT_BLOCK = 'ai_summary' +# SWIRL_DEFAULT_RESULT_BLOCK = 'ai_summary' OPENAI_API_KEY = env.get_value('OPENAI_API_KEY', default='') From 2992804de0af77f269dd4f535c523a2df62fc819 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Wed, 22 Nov 2023 15:30:14 +0100 Subject: [PATCH 60/88] remove AI block parsings --- swirl/connectors/mappings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swirl/connectors/mappings.py b/swirl/connectors/mappings.py index 21a2ebbf5..112d6c2d4 100644 --- a/swirl/connectors/mappings.py +++ b/swirl/connectors/mappings.py @@ -23,8 +23,8 @@ QUERY_MAPPING_KEYS = [ 'DATE_SORT', 'RELEVANCY_SORT', 'PAGE' ] RESPONSE_MAPPING_KEYS = [ 'FOUND', 'RETRIEVED', 'RESULTS', 'RESULT' ] -RESULT_MAPPING_KEYS = [ 'BLOCK' ] -MAPPING_KEYS = QUERY_MAPPING_KEYS + RESPONSE_MAPPING_KEYS + RESULT_MAPPING_KEYS +# RESULT_MAPPING_KEYS = [ 'BLOCK' ] +MAPPING_KEYS = QUERY_MAPPING_KEYS + RESPONSE_MAPPING_KEYS #+ RESULT_MAPPING_KEYS QUERY_MAPPING_VARIABLES = [ 'RESULT_INDEX', 'RESULT_ZERO_INDEX', 'PAGE_INDEX' ] RESULT_MAPPING_VARIABLES = [] From 85161b9623360942fd79f6e4bf3454739d71a2e5 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Wed, 29 Nov 2023 09:39:07 -0500 Subject: [PATCH 61/88] update automated testing workflow files --- .github/workflows/integration-api-tests.yml | 14 +++++--------- .github/workflows/smoke-tests.yml | 10 +++++----- .github/workflows/unit-tests.yml | 11 +++++++---- 3 files changed, 17 insertions(+), 18 deletions(-) diff --git a/.github/workflows/integration-api-tests.yml b/.github/workflows/integration-api-tests.yml index a5f9154bd..8ae4847ee 100644 --- a/.github/workflows/integration-api-tests.yml +++ b/.github/workflows/integration-api-tests.yml @@ -45,23 +45,19 @@ jobs: with: python-version: '3.11' cache: 'pip' # caching pip stuff - - name: Run Install Swirl + - name: Install Swirl run: ./install.sh - name: Update apt run: sudo apt -o Acquire::Retries=3 update - - name: upgrade Ubuntu to latest patches + - name: Upgrade Ubuntu to latest patches run: sudo apt upgrade -y - - name: stop update-notifier which restarts datetime - run: sudo systemctl stop update-notifier-download.timer - - name: disable update-notifier which restarts datetime - run: sudo systemctl disable update-notifier-download.timer - - name: Run Install redist-server + - name: Install redis-server run: sudo apt install -y redis-server - name: Set up Swirl run: python swirl.py setup - - name: Start up Swirl + - name: Start Swirl run: python swirl.py start - - name: Run Integrated API tests + - name: Run integrated API tests run: docker run --net=host -t swirlai/swirl-testing:latest-integrated-api sh -c "behave --tags=integrated_api" - name: Ensure artifacts directory exists and write branch and run_id again run: | diff --git a/.github/workflows/smoke-tests.yml b/.github/workflows/smoke-tests.yml index 86fad148c..c1c8c81cf 100644 --- a/.github/workflows/smoke-tests.yml +++ b/.github/workflows/smoke-tests.yml @@ -45,19 +45,19 @@ jobs: with: python-version: '3.11' cache: 'pip' # caching pip stuff - - name: Run Install Swirl + - name: Install Swirl run: ./install.sh - name: Update apt run: sudo apt -o Acquire::Retries=3 update - - name: upgrade Ubuntu to latest patches + - name: Upgrade Ubuntu to latest patches run: sudo apt upgrade -y - - name: Run Install redis-server + - name: Install redis-server run: sudo apt install -y redis-server - name: Set up Swirl run: python swirl.py setup - - name: Start up Swirl + - name: Start Swirl run: python swirl.py start - - name: Run Smoke tests + - name: Run smoke tests run: docker run --net=host -t swirlai/swirl-testing:latest-smoke-test sh -c "behave **/docker_container/*.feature --tags=docker_api_smoke" - name: Ensure artifacts directory exists and write branch and run_id again run: | diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index a1063cad6..cbb0a4004 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -2,11 +2,14 @@ name: UnitTests on: push: + # Only trigger on branches, not on tags + branches: + - 'main' + - 'develop' paths-ignore: - '.github/**' + - 'docs/**' - 'README.md' - # Only trigger on branches, not on tags - branches: 'develop' # Allows you to run this workflow manually from the Actions tab workflow_dispatch: @@ -23,9 +26,9 @@ jobs: with: python-version: '3.11' cache: 'pip' # caching pip stuff - - name: Run Install Swirl + - name: Install Swirl run: ./install.sh - - name: Run Install Tests Swirl + - name: Install pytest unit tests run: ./install-test.sh - name: Run pytest unit tests run: pytest From a381c4c7f042ab2c171c058f8d98c79c6087ac89 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Mon, 4 Dec 2023 11:12:55 -0500 Subject: [PATCH 62/88] add Internet Archive Items SP for next release --- SearchProviders/internet_archive.json | 33 ++++++++++++++++++++++++++ SearchProviders/preloaded.json | 33 ++++++++++++++++++++++++++ db.sqlite3.dist | Bin 299008 -> 299008 bytes 3 files changed, 66 insertions(+) create mode 100644 SearchProviders/internet_archive.json diff --git a/SearchProviders/internet_archive.json b/SearchProviders/internet_archive.json new file mode 100644 index 000000000..506cd0b2a --- /dev/null +++ b/SearchProviders/internet_archive.json @@ -0,0 +1,33 @@ +{ + "name": "Internet Archive Items - Archive.org", + "active": false, + "default": false, + "authenticator": "", + "connector": "RequestsGet", + "url": "https://archive.org/services/search/v1/scrape?xvar=production&fields=*&count=100", + "query_template": "{url}&q={query_string}", + "post_query_template": {}, + "http_request_headers": { + "Content-Type": "application/json" + }, + "page_fetch_config_json": {}, + "query_processors": [ + "AdaptiveQueryProcessor" + ], + "query_mappings": "", + "result_grouping_field": "", + "result_processors": [ + "MappingResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "FOUND=total,RESULTS=items", + "result_mappings": "title=title,body=description,date_published=publicdate,author=creator,url='https://archive.org/details/{identifier}',curatenote,files_count,subject,format[*],collection[*],downloads,scanner,mediatype,primary_collection,NO_PAYLOAD", + "results_per_query": 10, + "credentials": "", + "eval_credentials": "", + "tags": [ + "InternetArchive", + "Library", + "Items" + ] +} \ No newline at end of file diff --git a/SearchProviders/preloaded.json b/SearchProviders/preloaded.json index dd7088a8d..375a92046 100644 --- a/SearchProviders/preloaded.json +++ b/SearchProviders/preloaded.json @@ -1228,5 +1228,38 @@ "Tasks", "Internal" ] + }, + { + "name": "Internet Archive Items - Archive.org", + "active": false, + "default": false, + "authenticator": "", + "connector": "RequestsGet", + "url": "https://archive.org/services/search/v1/scrape?xvar=production&fields=*&count=100", + "query_template": "{url}&q={query_string}", + "post_query_template": {}, + "http_request_headers": { + "Content-Type": "application/json" + }, + "page_fetch_config_json": {}, + "query_processors": [ + "AdaptiveQueryProcessor" + ], + "query_mappings": "", + "result_grouping_field": "", + "result_processors": [ + "MappingResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "FOUND=total,RESULTS=items", + "result_mappings": "title=title,body=description,date_published=publicdate,author=creator,url='https://archive.org/details/{identifier}',curatenote,files_count,subject,format[*],collection[*],downloads,scanner,mediatype,primary_collection,NO_PAYLOAD", + "results_per_query": 10, + "credentials": "", + "eval_credentials": "", + "tags": [ + "InternetArchive", + "Library", + "Items" + ] } ] diff --git a/db.sqlite3.dist b/db.sqlite3.dist index 131f24c823bb1bd99b23eefecb40517b42b834cd..25eab4dcef69f03ac1a13ba3698f8fecc3f0cefb 100644 GIT binary patch delta 4230 zcmai%Uu;v?9mnJNBqgj4g#2-0$KV7KN^p+P|8sA;6ig_XCJ>fTMvD+Q_Jx=@cCc^K zGP+XMp|(9D=XZX2>E^bVZf<)c8CiLAcqX#)(YgMN?V0{{s|tnU893Exs&M+BW1?`Xq|R!@Auy52wD>;Dr;b?f8)Ljft3 zoSoS9P|60z{b4y_m|E2V5zr#0^KXnQ?a^k~F9voF_`)*Db&?;k)&>&R?sy`AIUzWo zZsJSvggaUyX((ZqNt={qHhg�osA{>JgaIJ9z*f&N4ge~#uucy106+pE>z!fd^Ixi(Mb__! zr~CrSltmUxWdnd9!L8>~C;+fJLF>ni2YF|(tCSfc5eTHnG1&-vyBB_yL7C? zYIB#57LK3;a@VOcDegVK_-u7WpU>uIpFJ_VFprOm!p7Rl#p>$O;tD%ZnlqL+&Sx9e zo!XB7n2J?Pgi6`63@ zJJDaRb$`_sOLu+Ic`N!!`^~nmtiski_Aj;qk$HHWc#0yZQ9(56yAThFz5fOna^`x# zlaYZ`N3w0QE0^2U6-&lqJ-K2mcC903f3OXtxOYv75NE1?doEdXO_y*wvs^F_A3tHA z+YN^8{$6mwe!UlLOnMjLf@4i}``{uj(Vc7`xYG{38m6|QI!cJfC7QI~N`V)g_v2vQ zuOzAracDG`(EG}tCBTo`eR*O|g|3CgR77Iu`zZjU9u0HK^Wd?)IOL?9(Xz3o9!i(t)p z5rcnt(<4wd9K>;$$R&b0?@}=5HJLzFwMc}VUW-U)Sb*pKChJ(2ETp6`bP2bW2HBrI z4uJD_9SHBOl@M*(FliDFH?tRU=2eaVaEH3K4P%T148hLtw($o&!_fO;V`l z291VnvPGDky8#BBw`ahN*E|Z^C)x)xv#9A2EBfz8hO>Q6t+c|FwIdh1@JUI2Bms6DG!QTgxe7V9CS`D zffv1X$)HfPW7tJ%k>*88*qJ3&)lVAWcivnXR3n!RyACZPv5opbpHp52FL>Q&kP7ur zNQjXwLaEbx5&YFJB1B;2SD?-qbcxO#S^;qf&o)i`XJ%ci;3n961gtq|^#*@jWyC>M=}`rv+@-N4yKv0hj+_C3E* zs86reYo+y~S*}&b&zFr#sWCk1hy9@do9$T> zr0uPb18gVD;8FWePk=2~*VdORKV1l^zN|SN`_e|R~p$X z<&se~%PN8Tw|8fX>vgqAwWbz2U#=L9v(0=mjrFCAM$yciuhmxz=IMzunPRO{QQxTC z)o|=2J#q#-X216;kiG)v8lyPmR&eMNN}R6Oz}LK8hC{|Q U7B&L5h~~b2gU&NIzz6OB2Gc6tGXMYp delta 3798 zcmai%TWl0n7{_@HJ{W^%c9dn#Of;Lm?63dxUH;$s zX7=EzSqD$eI-XB%f3vfk+G1!qUCm_J9q9 zl1#YG>1Da_QCLW*=(^te1R8N$vKjYKy0_K-7f#HX>7@}VCPq5{wv=XiBViFl+?~pB zTgw%KU>}FL#IRB@$91?P?fD3%jHyaz6`zQ~kW*O6SMg6#3@L-WlB;6Wk+-^pl!9Ei zn>rr0q9UmzXO;GZ!fqgm1;_4Ry@ zc_#uG62xukL=k|MfbM7~vjI~=2=`zh;LE(1K>3=!wlW8qy z9H#0M*`Ct)Q#&T6AcX(vER+;6)L&PhNE^kT-P`_OV0y(xvOdw%Q`+KQol~%r6C6T$ zv8(sq8GYGQiX^=YlgZXg5B3i~G`LUhUo|{^p^*)6?Ra zRqI80@ARW=-^k$eWjVTHd1++LlJSu#w!Ex{R_9U|`f_K}7m22>#_KaKrmoZ-Pk1eBz-90I znSeWYbdd>%a3lqc+=G9J#9s4KkkwDNfF;RHu|A(DHkL{ajZOKcrk2u{rlwcwi=LhZ z3M_mo#+-5ycdAc>EN>g%y==`oZ_@(M>GiaNtuqIQSC>~T9-r8_YtN1yCCeQy1Y$9b4T$hc$4s*43LuvXB`X~yxQr79gk!J3LAUq9V$iA=c7s2| z8A}IJGqNO_aXQ{WSh^tE2F!WIdxAW?B5pD?AD_kr({ zq3VRg3Bj1giF`tsUV9%{Wk;e)P!>~@aD2k4{=5iYwfiU;!!pJ?>G(tu{SpFygwvCb zK#G!>Mg)W^eVTybu+`FGl1Ry;H3dZZa|!h@2NgR-YOVs0>7aCcBAIV>zjy6^0QBz) z@Kk8Tq(hJ?FXom71XoAQgB&z#Gc7B_!IdK+;c=b^gem{#_vzUsFc7X+Ih+e_rav+* z0m0RK=pCzoj6Pom$W|L8&BHoQZ3uyW*6rSjGN5|HQgAbDkaCFP<`bv(iG)Gcrz_wW zTZv81BsQ*3DAHGkz$IIRFppi2%JGTB`rTo0I4qKmdC6farax7}q12zwx9?)WF@ntQWsR8Sc76$6Cpt5k*kU$OZ%}Z9;Mo$`d(QF=0=t zpGQD~jo6_VZ8l4I(Z5fOfFq`eSpt#XAjc7i5NK#0&M3r@;@kgo6`|l#rN8 zk>eA}JlJOT_{27Fpf&|TkEE0eTl=R*2-dCJ!Ed%mFib_P$S0C$G6}BQA~U|`)ihcq zbPSOQYd8hI0k@Y&P?)4p#ka;MG*}+y`81&~?gG+oi!hUeV?rXwClu@Uz2GBT#JE{^ zv2OUp%$pJQ>c0Kpbz4cz(Ac{?D8na8_> Date: Mon, 4 Dec 2023 16:14:04 -0500 Subject: [PATCH 63/88] remove the settings --- swirl/views.py | 6 +----- swirl_server/settings.py | 2 -- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/swirl/views.py b/swirl/views.py index 5ce394e08..b1d1d6612 100644 --- a/swirl/views.py +++ b/swirl/views.py @@ -46,9 +46,7 @@ from swirl.search import search as run_search SWIRL_EXPLAIN = getattr(settings, 'SWIRL_EXPLAIN', True) -SWIRL_RERUN_WAIT = getattr(settings, 'SWIRL_RERUN_WAIT', 8) SWIRL_SUBSCRIBE_WAIT = getattr(settings, 'SWIRL_SUBSCRIBE_WAIT', 20) -SWIRL_Q_WAIT = getattr(settings, 'SWIRL_Q_WAIT', 7) def remove_duplicates(my_list): new_list = [] @@ -422,7 +420,6 @@ def list(self, request): new_search.save() logger.info(f"{request.user} search_q {new_search.id}") # search_task.delay(new_search.id, Authenticator().get_session_data(request)) - # time.sleep(SWIRL_Q_WAIT) run_search(new_search.id, Authenticator().get_session_data(request),request=request) return redirect(f'/swirl/results?search_id={new_search.id}') @@ -497,7 +494,7 @@ def list(self, request): return Response(paginate(results, self.request), status=status.HTTP_200_OK) else: tries = tries + 1 - if tries > SWIRL_RERUN_WAIT: + if tries > 8: return Response(f'Timeout: {tries}, {new_search.status}!!', status=status.HTTP_500_INTERNAL_SERVER_ERROR) time.sleep(1) # end if @@ -532,7 +529,6 @@ def list(self, request): rerun_search.save() logger.info(f"{request.user} rerun {rerun_id}") # search_task.delay(rerun_search.id, Authenticator().get_session_data(request)) - # time.sleep(SWIRL_RERUN_WAIT) run_search(rerun_search.id, Authenticator().get_session_data(request), request=request) return redirect(f'/swirl/results?search_id={rerun_search.id}') # end if diff --git a/swirl_server/settings.py b/swirl_server/settings.py index b4b632696..c312bef78 100644 --- a/swirl_server/settings.py +++ b/swirl_server/settings.py @@ -228,8 +228,6 @@ SWIRL_DEFAULT_QUERY_LANGUAGE = 'english' SWIRL_TIMEOUT_DEFAULT = 10 SWIRL_TIMEOUT = env.int('SWIRL_TIMEOUT',default=SWIRL_TIMEOUT_DEFAULT) -SWIRL_Q_WAIT = 7 -SWIRL_RERUN_WAIT = 8 SWIRL_SUBSCRIBE_WAIT = 20 SWIRL_DEDUPE_FIELD = 'url' SWIRL_DEDUPE_SIMILARITY_MINIMUM = 0.95 From 1894d464163763c503c3054c407edbdcf544672e Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Mon, 4 Dec 2023 16:18:57 -0500 Subject: [PATCH 64/88] remove unused code --- swirl/views.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/swirl/views.py b/swirl/views.py index b1d1d6612..fce4fd42f 100644 --- a/swirl/views.py +++ b/swirl/views.py @@ -493,9 +493,6 @@ def list(self, request): return return Response(paginate(results, self.request), status=status.HTTP_200_OK) else: - tries = tries + 1 - if tries > 8: - return Response(f'Timeout: {tries}, {new_search.status}!!', status=status.HTTP_500_INTERNAL_SERVER_ERROR) time.sleep(1) # end if # end if From 0080874b5de72ac562df68bffed9ca3a8184bc55 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Mon, 4 Dec 2023 16:37:06 -0500 Subject: [PATCH 65/88] add page-fetcher config to four PSE SPs + new db.dist for next release --- SearchProviders/google_pse.json | 95 ++++++++++++++++++++++++++++++- SearchProviders/preloaded.json | 97 +++++++++++++++++++++++++++++++- db.sqlite3.dist | Bin 299008 -> 299008 bytes 3 files changed, 189 insertions(+), 3 deletions(-) diff --git a/SearchProviders/google_pse.json b/SearchProviders/google_pse.json index 86aeb81bd..19dfe9f4e 100644 --- a/SearchProviders/google_pse.json +++ b/SearchProviders/google_pse.json @@ -6,6 +6,30 @@ "connector": "RequestsGet", "url": "https://www.googleapis.com/customsearch/v1", "query_template": "{url}?cx={cx}&key={key}&q={query_string}", + "post_query_template": "{}", + "http_request_headers": {}, + "page_fetch_config_json": { + "cache": "false", + "headers": { + "User-Agent": "Swirlbot/1.0 (+http://swirl.today)" + }, + "www.businesswire.com": { + "timeout": 60 + }, + "www.linkedin.com": { + "timeout": 5 + }, + "rs.linkedin.com": { + "timeout": 5 + }, + "uk.linkedin.com": { + "timeout": 5 + }, + "au.linkedin.com": { + "timeout": 5 + }, + "timeout": 30 + }, "query_processors": [ "AdaptiveQueryProcessor" ], @@ -30,6 +54,30 @@ "connector": "RequestsGet", "url": "https://www.googleapis.com/customsearch/v1", "query_template": "{url}?cx={cx}&key={key}&q={query_string}", + "post_query_template": "{}", + "http_request_headers": {}, + "page_fetch_config_json": { + "cache": "false", + "headers": { + "User-Agent": "Swirlbot/1.0 (+http://swirl.today)" + }, + "www.businesswire.com": { + "timeout": 60 + }, + "www.linkedin.com": { + "timeout": 5 + }, + "rs.linkedin.com": { + "timeout": 5 + }, + "uk.linkedin.com": { + "timeout": 5 + }, + "au.linkedin.com": { + "timeout": 5 + }, + "timeout": 30 + }, "query_processors": [ "AdaptiveQueryProcessor" ], @@ -54,6 +102,30 @@ "connector": "RequestsGet", "url": "https://www.googleapis.com/customsearch/v1", "query_template": "{url}?cx={cx}&key={key}&q={query_string}", + "post_query_template": "{}", + "http_request_headers": {}, + "page_fetch_config_json": { + "cache": "false", + "headers": { + "User-Agent": "Swirlbot/1.0 (+http://swirl.today)" + }, + "www.businesswire.com": { + "timeout": 60 + }, + "www.linkedin.com": { + "timeout": 5 + }, + "rs.linkedin.com": { + "timeout": 5 + }, + "uk.linkedin.com": { + "timeout": 5 + }, + "au.linkedin.com": { + "timeout": 5 + }, + "timeout": 30 + }, "query_processors": [ "AdaptiveQueryProcessor" ], @@ -110,7 +182,28 @@ "query_template": "{url}?cx={cx}&key={key}&q={query_string}", "post_query_template": {}, "http_request_headers": {}, - "page_fetch_config_json": {}, + "page_fetch_config_json": { + "cache": "false", + "headers": { + "User-Agent": "Swirlbot/1.0 (+http://swirl.today)" + }, + "www.businesswire.com": { + "timeout": 60 + }, + "www.linkedin.com": { + "timeout": 5 + }, + "rs.linkedin.com": { + "timeout": 5 + }, + "uk.linkedin.com": { + "timeout": 5 + }, + "au.linkedin.com": { + "timeout": 5 + }, + "timeout": 30 + }, "query_processors": [ "AdaptiveQueryProcessor" ], diff --git a/SearchProviders/preloaded.json b/SearchProviders/preloaded.json index 375a92046..4686bae83 100644 --- a/SearchProviders/preloaded.json +++ b/SearchProviders/preloaded.json @@ -6,6 +6,30 @@ "connector": "RequestsGet", "url": "https://www.googleapis.com/customsearch/v1", "query_template": "{url}?cx={cx}&key={key}&q={query_string}", + "post_query_template": "{}", + "http_request_headers": {}, + "page_fetch_config_json": { + "cache": "false", + "headers": { + "User-Agent": "Swirlbot/1.0 (+http://swirl.today)" + }, + "www.businesswire.com": { + "timeout": 60 + }, + "www.linkedin.com": { + "timeout": 5 + }, + "rs.linkedin.com": { + "timeout": 5 + }, + "uk.linkedin.com": { + "timeout": 5 + }, + "au.linkedin.com": { + "timeout": 5 + }, + "timeout": 30 + }, "query_processors": [ "AdaptiveQueryProcessor" ], @@ -31,6 +55,30 @@ "connector": "RequestsGet", "url": "https://www.googleapis.com/customsearch/v1", "query_template": "{url}?cx={cx}&key={key}&q={query_string}", + "post_query_template": "{}", + "http_request_headers": {}, + "page_fetch_config_json": { + "cache": "false", + "headers": { + "User-Agent": "Swirlbot/1.0 (+http://swirl.today)" + }, + "www.businesswire.com": { + "timeout": 60 + }, + "www.linkedin.com": { + "timeout": 5 + }, + "rs.linkedin.com": { + "timeout": 5 + }, + "uk.linkedin.com": { + "timeout": 5 + }, + "au.linkedin.com": { + "timeout": 5 + }, + "timeout": 30 + }, "query_processors": [ "AdaptiveQueryProcessor" ], @@ -56,6 +104,30 @@ "connector": "RequestsGet", "url": "https://www.googleapis.com/customsearch/v1", "query_template": "{url}?cx={cx}&key={key}&q={query_string}", + "post_query_template": "{}", + "http_request_headers": {}, + "page_fetch_config_json": { + "cache": "false", + "headers": { + "User-Agent": "Swirlbot/1.0 (+http://swirl.today)" + }, + "www.businesswire.com": { + "timeout": 60 + }, + "www.linkedin.com": { + "timeout": 5 + }, + "rs.linkedin.com": { + "timeout": 5 + }, + "uk.linkedin.com": { + "timeout": 5 + }, + "au.linkedin.com": { + "timeout": 5 + }, + "timeout": 30 + }, "query_processors": [ "AdaptiveQueryProcessor" ], @@ -1136,9 +1208,30 @@ "connector": "RequestsGet", "url": "https://www.googleapis.com/customsearch/v1", "query_template": "{url}?cx={cx}&key={key}&q={query_string}", - "post_query_template": {}, + "post_query_template": "{}", "http_request_headers": {}, - "page_fetch_config_json": {}, + "page_fetch_config_json": { + "cache": "false", + "headers": { + "User-Agent": "Swirlbot/1.0 (+http://swirl.today)" + }, + "www.businesswire.com": { + "timeout": 60 + }, + "www.linkedin.com": { + "timeout": 5 + }, + "rs.linkedin.com": { + "timeout": 5 + }, + "uk.linkedin.com": { + "timeout": 5 + }, + "au.linkedin.com": { + "timeout": 5 + }, + "timeout": 30 + }, "query_processors": [ "AdaptiveQueryProcessor" ], diff --git a/db.sqlite3.dist b/db.sqlite3.dist index 25eab4dcef69f03ac1a13ba3698f8fecc3f0cefb..839e166109ad6b72f3f3388a6052c29ab2b425af 100644 GIT binary patch delta 4512 zcmd52HUZX4YtSlHDmJ%ugiJgb8x^I+b}j@z;+2KMa2deT)+lw<3L0c z2#=tK{9K7!DT+cMeW9 z!F*he&f5E&z1M!6eO|b=_=Q`Ge|)}f5u+Qq+(?a0gd)Omuh*>%tuZO7bbnsg9bV%` zE4R9SXMI7JF;3~o%F-Aec54$2?v?trVGTlgfyr!RF5prYlw!m)rI}26wE!@ z@KjiVOCt7mpu(=q5g#=?SsPLylBsmNd#a(yU6n|N5aJj{(y0t`I*|;SMFeBgeLb-$ zx2A2Qn3BryptZAc#k?xh|&Rh_+ zt&Jqg{h(Qd#i+)$+7=aBLzB4U?rYf?UXz+C_w^PN+D>C=+)rA%LShbT3Rkp-k8m6+ zv^&=-!*a^u#Qj+-53g}_*L~O;HpQV#ad&H5XGl(IidpjV{53&^aev?TR7g&0fzfME zisnaIi3~_Z1t%q?vBZk*(IY1(CfzrZiLee+hPyYD9icU09Hiq{$xsxu5X$JEq`tj- z>jZm7_R5j{J@U-6l+X6foY=pspDo`@&z|nzb!v3z8{78p>)zeV$Ik6Ly+J)aJvOv+ zPO!tHy@NZB5B8ouKGg62dTRNj<2bP;k&=r`r>>rgzkAAha4)$s7K?4~z|Xc8{PZ>8 zr*{#4dY-_~R(O|`z{iykn~ha2TzI?WXl&LUtr_>;Ujo~o%=#^bEe!Sp#R%i;DO*oO zY91oIUl^$Kd+K1yYJW+WBxA$n-QDHoRm(;X)^}fAcG0Q2*!AMq<5h=u)L!1?{iXpn ztj=%1W$=sIIzSxaz1j)~y{>c6=HE#GvAjQzz{vA+b946C?DWLs`1JJL#F=sX@YKnU zjj884W+qOKPtDHYe?=~=OLYW-CnhG3j~|(sj1ZE6%$e!`DKdL}p2+k6byjAZ(b;)2 zQ6dHFIJxlBn3as}DrV|(CoCyj zk4WSIF;0l7x;FSM{#az&SHBFVs<7G@#vxYyaaDW8vkM=VJ}kK$2dwqKcz;?3+jA62 zTW~FyE~Y4paN`dzhY_nFkJlo0=qULB(cHhk682cRULtJ_fMk*K0mA+CYIqPYY)?4n zG$Oa99Ux5lLu+9;Z;rrAO5=rGY)%$Y!vA1BFw1*T1D&};gzez9Pa>59gbJ^E8zlU% z)xw21$#*Wr9N|PN1&EeC$3|zi?-}t9t%4SB#S(b6CWrx8gR#on@8}CGJmsz10^{Zq>YzV;CNAHXA#A{Rs|cp`<(#( z7X~^@3MV?mVCqL{28g%i2Gn~*LA8H=6I`{tD=A19qA4AwqizOi2Z;2#Z$Mo}+e7Uo z{+f39sdaM;l-9<}VpW%)I8@%Z@RibcO3ud1Fctsi{h$vv<$O{&-f*U!V#_iJC&b6F z^yFi&Y)2yt5xJ5@gbKpA+P{;Af8-NP*-CQ5B9#J!YybTLcrTvwSlN;qAtLG{izxrr zPPh@;o@07NZ;Mea1 zmFM5F6~UnJVqdceRo>l$!2Iv+hm-k!;MhWGe5ph!1&H+CItbjKJOGU$9V~pKB1m=Y z0C9ZO8M3@rw!r2br;bgHBTb}nh$v#R$pkoJ!&NPM{{)avAhj?93 z-3#IsvC0=K2Ftn@w861>1+x23@7fU<%Gsf9DU`z6C@Rz}f|oTG>QZmr7_9O8#^KYv zG1?ZCny70&i{Pk4>TnF3I0hH2 JdcTbE>pw-4S|vbH*Zfw$$V9{05Hfc!W z;9?4rMdIQ>MX1zXk^7^M1k|bsAwe6-7l}fs_)#QOASFfosH#S&jYLIJB@l?0n7do= z9kZ7|_s4wa%$f6;bLO7Aa@)BpxBc+TZR>~}BzRD0^;%8#afdTGwS`ctzfTV z$*I@ezP~-v6&h1Ky`|0}Q(kMP!&_?KZfOX`wDhjE@3S?S5`4P9s=+(9H(UErPP|hc zyKM!`wYYbqQ5Y|qxY4n-HB#V8Xj;sBvmIUDKqhMwK`ANb%H(V&YaNRd*Hzx-%pPkE z6Hci2LFUeA2c?y(WwF=n(CJNW$k{|lNsB#Y^4w}dTtmYNV%%#=xQ*D9i1=@vW6*IVk|6(vz|Es6K5Zf=Qj zDa=}HkF?jABGNn5Gia}2iLrONM_b#Og2CSVJ$o%N#!W0e-fNp+6ieZKu~*n~#CY!g zxR+UL)a8PD|L(PqLM5fHH`KSwmSfly@5NPXL=&pK-}UXb*ujiPPSuO*Rxg@q}GzFS90H&7#e1UQPOt zhmlBK5T;BG) zn~7+9f|X{m-h_n9U~Vf+RgZ&_R>al3cKb5XSOtIH1~hr;lK{-%sI0 zqlx4{{0GKhbj3DBj7l>{Yxk-U;s$Sy!M_qKu2E##5Edj(J|xn=J_hZ<+5_;{gnxiS zF;X;l3CoxmQWO$Zz9;_W1xWh~5;}qpDU7F9voe0~?cmeF64FmP{Km&A8SDhHyBtRA4L(SF4XA z*W9c)`7)t6j8%K^;V?8*4@Qwa$tQ3{cx?F^BRtuaW zF8Ti{nvh5qERVwX5+4`rb*E``&71XW>kcHYUoEvTq?7n84|3c%fr#lSA|_=OB9*^x z9<~Jg=HS_C#0A2f=(wV;5Hp*M(hsWg9BI={;_5jhrkmj4)?=_4n!|{s(ix}mt@; From cf76540ef081460841c4ae5172066eb0a977216a Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Tue, 5 Dec 2023 13:48:05 +0100 Subject: [PATCH 66/88] remove unnecessary --- swirl/views.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/swirl/views.py b/swirl/views.py index fce4fd42f..77b62c6d5 100644 --- a/swirl/views.py +++ b/swirl/views.py @@ -245,10 +245,6 @@ def post(self, request): class UpdateMicrosoftToken(APIView): def post(self, request): try: - headers = { - 'Authorization': request.headers['Authorization'], - 'Microsoft-Authorization': request.headers['Microsoft-Authorization'] - } # just return succcess,don't call the task # result = update_microsoft_token_task.delay(headers).get() result = { 'user': request.user.username, 'status': 'success' } @@ -461,10 +457,6 @@ def list(self, request): new_search.save() # log info logger.info(f"{request.user} search_qs {new_search.id}") - headers = { - 'Authorization': request.headers.get('Authorization', ''), - 'Microsoft-Authorization': request.headers.get('Microsoft-Authorization', '') - } res = run_search(new_search.id, Authenticator().get_session_data(request), request=request) if not res: logger.info(f'Search failed: {new_search.status}!!', status=status.HTTP_500_INTERNAL_SERVER_ERROR) From 61b6d94d2f55efe3e7ca5eb853ac1143c880d5a7 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Tue, 5 Dec 2023 13:48:32 +0100 Subject: [PATCH 67/88] run rag processor when rag param is presented --- swirl/consumers.py | 2 +- swirl/mixers/mixer.py | 68 +++++++++++++++++------------------ swirl/processors/processor.py | 4 +-- swirl/processors/rag.py | 11 +++--- swirl/search.py | 13 ++++--- swirl_server/settings.py | 2 +- 6 files changed, 51 insertions(+), 49 deletions(-) diff --git a/swirl/consumers.py b/swirl/consumers.py index e77b2177c..ba1015659 100644 --- a/swirl/consumers.py +++ b/swirl/consumers.py @@ -36,7 +36,7 @@ def get_rag_result(self, search_id, rag_query_items): return False except: pass - rag_processor = RAGPostResultProcessor(search_id=search_id, request_id='', is_socket_logic=True, rag_query_items=rag_query_items) + rag_processor = RAGPostResultProcessor(search_id=search_id, request_id='', should_get_results=True, rag_query_items=rag_query_items) instances[search_id] = rag_processor if rag_processor.validate(): result = rag_processor.process(should_return=True) diff --git a/swirl/mixers/mixer.py b/swirl/mixers/mixer.py index 0dadcb702..e65dbf762 100644 --- a/swirl/mixers/mixer.py +++ b/swirl/mixers/mixer.py @@ -201,48 +201,48 @@ def finalize(self): if 'swirl_score' in result: del result['swirl_score'] # end if - # if 'result_block' in result: - # block_name = result['result_block'] - # del result['result_block'] - # if block_name in block_dict: - # block_count = block_count + 1 - # result['swirl_rank'] = block_count - # block_dict[block_name].append(result) - # else: - # block_count = 1 - # result['swirl_rank'] = block_count - # block_dict[block_name] = [result] - # # end if - # peek_search_provider = result.get('searchprovider', None) - # if peek_search_provider and self.mix_wrapper['info'].get(peek_search_provider, None): - # del self.mix_wrapper['info'][result['searchprovider']] - # # end if - # else: - result['swirl_rank'] = mixed_result_number - mixed_results.append(result) - mixed_result_number = mixed_result_number + 1 + if 'result_block' in result: + block_name = result['result_block'] + del result['result_block'] + if block_name in block_dict: + block_count = block_count + 1 + result['swirl_rank'] = block_count + block_dict[block_name].append(result) + else: + block_count = 1 + result['swirl_rank'] = block_count + block_dict[block_name] = [result] + # end if + peek_search_provider = result.get('searchprovider', None) + if peek_search_provider and self.mix_wrapper['info'].get(peek_search_provider, None): + del self.mix_wrapper['info'][result['searchprovider']] + # end if + else: + result['swirl_rank'] = mixed_result_number + mixed_results.append(result) + mixed_result_number = mixed_result_number + 1 # end if # end for # block results - # self.mix_wrapper['info']['results']['result_blocks'] = [] + self.mix_wrapper['info']['results']['result_blocks'] = [] # default block, if specified in settings - # if settings.SWIRL_DEFAULT_RESULT_BLOCK: - # self.mix_wrapper['info']['results']['result_blocks'].append(settings.SWIRL_DEFAULT_RESULT_BLOCK) - # self.mix_wrapper[settings.SWIRL_DEFAULT_RESULT_BLOCK] = [] + if settings.SWIRL_DEFAULT_RESULT_BLOCK: + self.mix_wrapper['info']['results']['result_blocks'].append(settings.SWIRL_DEFAULT_RESULT_BLOCK) + self.mix_wrapper[settings.SWIRL_DEFAULT_RESULT_BLOCK] = [] # blocks specified by provider(s) - # moved_to_block = 0 - # for block in block_dict: - # self.mix_wrapper[block] = block_dict[block] - # moved_to_block = moved_to_block + len(block_dict[block]) - # if not block in self.mix_wrapper['info']['results']['result_blocks']: - # self.mix_wrapper['info']['results']['result_blocks'].append(block) - # if moved_to_block > 0: - # self.mix_wrapper['info']['results']['retrieved_total'] = self.found - moved_to_block - # if self.mix_wrapper['info']['results']['retrieved_total'] < 0: - # self.warning("Block count exceeds result count") + moved_to_block = 0 + for block in block_dict: + self.mix_wrapper[block] = block_dict[block] + moved_to_block = moved_to_block + len(block_dict[block]) + if not block in self.mix_wrapper['info']['results']['result_blocks']: + self.mix_wrapper['info']['results']['result_blocks'].append(block) + if moved_to_block > 0: + self.mix_wrapper['info']['results']['retrieved_total'] = self.found - moved_to_block + if self.mix_wrapper['info']['results']['retrieved_total'] < 0: + self.warning("Block count exceeds result count") # extract the page of mixed results self.mixed_results = mixed_results diff --git a/swirl/processors/processor.py b/swirl/processors/processor.py index 0589c4ed1..58e07adf3 100644 --- a/swirl/processors/processor.py +++ b/swirl/processors/processor.py @@ -181,7 +181,7 @@ class PostResultProcessor(Processor): ######################################## - def __init__(self, search_id, request_id='', is_socket_logic=False, rag_query_items=False): + def __init__(self, search_id, request_id='', should_get_results=False, rag_query_items=False): self.search_id = search_id self.search = None @@ -194,7 +194,7 @@ def __init__(self, search_id, request_id='', is_socket_logic=False, rag_query_it self.error(f"Search not found {search_id}") return 0 self.search = Search.objects.get(id=search_id) - if self.search.status == 'POST_RESULT_PROCESSING' or self.search.status == 'RESCORING' or is_socket_logic: + if self.search.status == 'POST_RESULT_PROCESSING' or self.search.status == 'RESCORING' or should_get_results: # security review for 1.7 - OK, filtered by search ID self.results = Result.objects.filter(search_id=search_id) else: diff --git a/swirl/processors/rag.py b/swirl/processors/rag.py index a0f78b861..acd9bfe36 100644 --- a/swirl/processors/rag.py +++ b/swirl/processors/rag.py @@ -83,8 +83,8 @@ class RAGPostResultProcessor(PostResultProcessor): type="RAGPostResultProcessor" - def __init__(self, search_id, request_id='', is_socket_logic=False, rag_query_items=False): - super().__init__(search_id=search_id, request_id=request_id, is_socket_logic=is_socket_logic, rag_query_items=rag_query_items) + def __init__(self, search_id, request_id='', should_get_results=False, rag_query_items=False): + super().__init__(search_id=search_id, request_id=request_id, should_get_results=should_get_results, rag_query_items=rag_query_items) self.tasks = None self.stop_background_thread = False try: @@ -248,7 +248,8 @@ def background_process(self): rag_result['author'] = 'ChatGPT' rag_result['searchprovider'] = 'ChatGPT' rag_result['searchprovider_rank'] = 1 - # rag_result['result_block'] = 'ai_summary' + if settings.SWIRL_DEFAULT_RESULT_BLOCK: + rag_result['result_block'] = getattr(settings, 'SWIRL_DEFAULT_RESULT_BLOCK', 'ai_summary') rag_result['rag_query_items'] = [str(item['swirl_id']) for item in chosen_rag] result = Result.objects.create(owner=self.search.owner, search_id=self.search, provider_id=5, searchprovider='ChatGPT', query_string_to_provider=new_prompt_text[:256], query_to_provider='None', status='READY', retrieved=1, found=1, json_results=[rag_result], time=0.0) @@ -256,9 +257,7 @@ def background_process(self): return result - def process(self, should_return=False): - - logger.info('RUN RAG') + def process(self, should_return=True): # to do: remove foo:etc self.client = None if getattr(settings, 'OPENAI_API_KEY', None): diff --git a/swirl/search.py b/swirl/search.py index 148796a39..b2db4114d 100644 --- a/swirl/search.py +++ b/swirl/search.py @@ -255,8 +255,7 @@ def search(id, session=None, request=None): search.save() processor_list = search.post_result_processors - # add_to_post_processors_if_tag_in_request(request=request, processor_list=search.post_result_processors, - # tag="rag", processor_name="RAGPostResultProcessor") + for processor in processor_list: logger.debug(f"{module_name}: invoking processor: {processor}") try: @@ -305,6 +304,7 @@ def search(id, session=None, request=None): # log info retrieved = 0 + run_processor_if_tag_in_request(request=request, search=search, swqrx_logger=swqrx_logger, session=session, tag="rag", processor_name="RAGPostResultProcessor") for current_retrieved in results: if isinstance(current_retrieved, int) and current_retrieved > 0: retrieved = retrieved + current_retrieved @@ -312,12 +312,15 @@ def search(id, session=None, request=None): return True -def add_to_post_processors_if_tag_in_request(processor_list, tag, processor_name, request): +def run_processor_if_tag_in_request(tag, processor_name, request, search, swqrx_logger, session): if not (request and tag and processor_name): return try: - if tag in request.GET.keys() and processor_name not in processor_list: - processor_list.append(processor_name) + if tag in request.GET.keys() and processor_name: + processor = alloc_processor(processor=processor_name)(search_id=search.id, request_id=swqrx_logger.request_id, session=session, should_get_results=True) + if processor.validate(): + return processor.process() + return False except Exception as err: logger.warning(f'{err} while adding {processor_name} for {tag}') diff --git a/swirl_server/settings.py b/swirl_server/settings.py index c312bef78..6e266add5 100644 --- a/swirl_server/settings.py +++ b/swirl_server/settings.py @@ -262,7 +262,7 @@ SWIRL_SEARCH_FORM_URL_DEF = '/swirl/search.html' SWIRL_SEARCH_FORM_URL = env('SWIRL_SEARCH_FORM_URL', default=SWIRL_SEARCH_FORM_URL_DEF) -# SWIRL_DEFAULT_RESULT_BLOCK = 'ai_summary' +SWIRL_DEFAULT_RESULT_BLOCK = 'ai_summary' OPENAI_API_KEY = env.get_value('OPENAI_API_KEY', default='') From c9fe3b14218484d8dd64cf3d5402ef8118971953 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Tue, 5 Dec 2023 14:05:30 +0100 Subject: [PATCH 68/88] move websocket response timeout in config --- swirl/static/api/config/default | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/swirl/static/api/config/default b/swirl/static/api/config/default index 64297c333..edc38f3bb 100644 --- a/swirl/static/api/config/default +++ b/swirl/static/api/config/default @@ -77,5 +77,9 @@ "authority": "", "redirectUri": "http://localhost:8000/spyglass/microsoft-callback" } + }, + "webSocketConfig": { + "url": "ws://localhost:8000/chatgpt-data", + "timeout": 30000 } } \ No newline at end of file From d1061539476ae26ef8b8ff5ec2fd4d4608c5afca Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Tue, 5 Dec 2023 18:56:36 +0100 Subject: [PATCH 69/88] move timeout text in config --- swirl/static/api/config/default | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/swirl/static/api/config/default b/swirl/static/api/config/default index edc38f3bb..f6036d02d 100644 --- a/swirl/static/api/config/default +++ b/swirl/static/api/config/default @@ -80,6 +80,7 @@ }, "webSocketConfig": { "url": "ws://localhost:8000/chatgpt-data", - "timeout": 30000 + "timeout": 30000, + "timeoutText": "Timeout: No response from Generative AI." } } \ No newline at end of file From f4fe34d5c5d338f9b37019a6f4e0454de0ca7de6 Mon Sep 17 00:00:00 2001 From: Dmitriy Kostenko Date: Tue, 5 Dec 2023 19:07:25 +0100 Subject: [PATCH 70/88] remove session object from post result processor --- swirl/search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/swirl/search.py b/swirl/search.py index b2db4114d..e901ebf95 100644 --- a/swirl/search.py +++ b/swirl/search.py @@ -304,7 +304,7 @@ def search(id, session=None, request=None): # log info retrieved = 0 - run_processor_if_tag_in_request(request=request, search=search, swqrx_logger=swqrx_logger, session=session, tag="rag", processor_name="RAGPostResultProcessor") + run_processor_if_tag_in_request(request=request, search=search, swqrx_logger=swqrx_logger, tag="rag", processor_name="RAGPostResultProcessor") for current_retrieved in results: if isinstance(current_retrieved, int) and current_retrieved > 0: retrieved = retrieved + current_retrieved @@ -312,12 +312,12 @@ def search(id, session=None, request=None): return True -def run_processor_if_tag_in_request(tag, processor_name, request, search, swqrx_logger, session): +def run_processor_if_tag_in_request(tag, processor_name, request, search, swqrx_logger): if not (request and tag and processor_name): return try: if tag in request.GET.keys() and processor_name: - processor = alloc_processor(processor=processor_name)(search_id=search.id, request_id=swqrx_logger.request_id, session=session, should_get_results=True) + processor = alloc_processor(processor=processor_name)(search_id=search.id, request_id=swqrx_logger.request_id, should_get_results=True) if processor.validate(): return processor.process() return False From 2df692dc59275b7e407fdffe695d25bdc726c859 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Tue, 5 Dec 2023 15:30:54 -0500 Subject: [PATCH 71/88] change stem token error to include tokens --- swirl/processors/relevancy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/swirl/processors/relevancy.py b/swirl/processors/relevancy.py index 6e8badb4f..2e25a0bc8 100644 --- a/swirl/processors/relevancy.py +++ b/swirl/processors/relevancy.py @@ -60,7 +60,7 @@ def process(self): return parsed_query = parse_query(self.query_string, self.result_processor_json_feedback) if len(parsed_query.query_stemmed_target_list) != len(parsed_query.query_target_list): - self.error("len(query_stemmed_target_list) != len(query_target_list), highlighting errors may occur") + self.error(f"parsed query [un]stemmed mismatch : {parsed_query.query_stemmed_target_list} != {parsed_query.query_target_list}") list_query_lens.append(len(parsed_query.query_list)) for item in self.results: @@ -147,7 +147,7 @@ def process(self): result_field_stemmed = stem_string(result_field) result_field_stemmed_list = result_field_stemmed.strip().split() if len(result_field_list) != len(result_field_stemmed_list): - self.error("len(result_field_list) != len(result_field_stemmed_list), highlighting errors may occur") + self.error(f"result field [un]stemmed mismatch : {result_field_list} != {result_field_stemmed_list}") # NOT test for t in parsed_query.not_list: if t.lower() in (result_field.lower() for result_field in result_field_list): From 165035e6e0d14daf271b169b5cd34aaa0b370082 Mon Sep 17 00:00:00 2001 From: Joe Dinsmoor Date: Wed, 6 Dec 2023 09:40:07 -0500 Subject: [PATCH 72/88] Final pass for metrics update --- swirl/utils.py | 55 +++++++++++++++++++++++++++++++++++++++++++++++--- swirl/views.py | 3 +++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/swirl/utils.py b/swirl/utils.py index 88290a874..0c356107d 100644 --- a/swirl/utils.py +++ b/swirl/utils.py @@ -8,9 +8,13 @@ import logging as logger import json from pathlib import Path +import uuid import redis +import socket +import sqlite3 from django.core.paginator import Paginator from django.conf import settings +from django.contrib.auth import get_user_model from swirl.web_page import PageFetcherFactory from urllib.parse import urlparse @@ -18,6 +22,7 @@ SWIRL_MACHINE_AGENT = {'User-Agent': 'SwirlMachineServer/1.0 (+http://swirl.today)'} SWIRL_CONTAINER_AGENT = {'User-Agent': 'SwirlContainer/1.0 (+http://swirl.today)'} + ################################################## ################################################## @@ -29,6 +34,18 @@ def safe_urlparse(url): print(f'{err} while parsing URL') finally: return ret + +def provider_getter(): + try: + conn = sqlite3.connect('../db.sqlite3') + cur = conn.cursor() + cur.execute("select * from swirl_searchprovider") + res = cur.fetchall() + return res + except: + res = '' + return res + def is_running_celery_redis(): """ @@ -79,17 +96,49 @@ def is_running_in_docker(): return False def get_page_fetcher_or_none(url): + from swirl.views import SearchViewSet + + search_provider_count = provider_getter() + user = get_user_model() + user_list = user.objects.all() + user_count = len(user_list) + hostname = socket.gethostname() + domain_name = socket.gethostbyaddr() headers = SWIRL_CONTAINER_AGENT if is_running_in_docker() else SWIRL_MACHINE_AGENT - - if (pf := PageFetcherFactory.alloc_page_fetcher(url=url, options= { + """ + info is a tuple with 5 elements. + info[0] : number of search providers + info[1] : number of search objects + info[2] : number of django users + info[3] : hostname + info[4] : domain name + """ + info = [ + len(search_provider_count), + SearchViewSet.report(), + user_count, + hostname, + domain_name[0] + ] + newurl = url_merger(url, info) + if (pf := PageFetcherFactory.alloc_page_fetcher(url=newurl, options= { "cache": "false", - "headers":headers + "headers":headers, })): return pf else: logger.info(f"No fetcher for {url}") return None + +def url_merger(url, info): + data = '' + for inf in info: + data = data.join("info=") + data = data.join(inf) + data = data.join("&") + url = url.join(data) + return url def get_url_details(request): if request: diff --git a/swirl/views.py b/swirl/views.py index 77b62c6d5..cf54bf741 100644 --- a/swirl/views.py +++ b/swirl/views.py @@ -373,6 +373,9 @@ class SearchViewSet(viewsets.ModelViewSet): serializer_class = SearchSerializer authentication_classes = [SessionAuthentication, BasicAuthentication] + def report(self): + return self.queryset + def list(self, request): # check permissions if not request.user.has_perm('swirl.view_search'): From ed103884d6cd4acbc77a5a01891fdf4ac67dfb11 Mon Sep 17 00:00:00 2001 From: Joe Dinsmoor Date: Wed, 6 Dec 2023 10:18:08 -0500 Subject: [PATCH 73/88] Update fallback options for search providers --- swirl/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/swirl/utils.py b/swirl/utils.py index 0c356107d..f90e5ae48 100644 --- a/swirl/utils.py +++ b/swirl/utils.py @@ -12,6 +12,7 @@ import redis import socket import sqlite3 +import glob from django.core.paginator import Paginator from django.conf import settings from django.contrib.auth import get_user_model @@ -43,8 +44,12 @@ def provider_getter(): res = cur.fetchall() return res except: - res = '' - return res + try: + res = len(glob.glob1('../SearchProviders/', "*.json")) + return res + except: + res = '' + return res def is_running_celery_redis(): From 9f2ca4c93cffaea4480dc60b6c550f534ad94b04 Mon Sep 17 00:00:00 2001 From: Joe Dinsmoor Date: Wed, 6 Dec 2023 10:35:47 -0500 Subject: [PATCH 74/88] update removing double len() call --- swirl/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swirl/utils.py b/swirl/utils.py index f90e5ae48..a44cfc0fc 100644 --- a/swirl/utils.py +++ b/swirl/utils.py @@ -45,7 +45,7 @@ def provider_getter(): return res except: try: - res = len(glob.glob1('../SearchProviders/', "*.json")) + res = glob.glob1('../SearchProviders/', "*.json") return res except: res = '' From 5da181de2f6e85bc33e7c582acbec4729c6bbd45 Mon Sep 17 00:00:00 2001 From: Sid Date: Thu, 7 Dec 2023 12:36:34 -0500 Subject: [PATCH 75/88] MongoDB connector --- SearchProviders/movies_mongodb.json | 31 +++++++ requirements.txt | 3 + swirl/connectors/__init__.py | 2 +- swirl/connectors/mongodb.py | 131 ++++++++++++++++++++++++++++ swirl/models.py | 1 + 5 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 SearchProviders/movies_mongodb.json create mode 100644 swirl/connectors/mongodb.py diff --git a/SearchProviders/movies_mongodb.json b/SearchProviders/movies_mongodb.json new file mode 100644 index 000000000..a3d18e17f --- /dev/null +++ b/SearchProviders/movies_mongodb.json @@ -0,0 +1,31 @@ +{ + "name": "Netflix Movies - MongoDB", + "active": true, + "default": false, + "authenticator": "", + "connector": "MongoDB", + "url": "sample_mflix:movies", + "query_template": "{'$text': {'$search': '{query_string}'}}", + "post_query_template": {}, + "http_request_headers": {}, + "page_fetch_config_json": {}, + "query_processors": [ + "AdaptiveQueryProcessor" + ], + "query_mappings": "NOT_CHAR=-", + "result_grouping_field": "", + "result_processors": [ + "MappingResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "", + "result_mappings": "title=name,body=fullplot,date_published=year,date_published_display=year,author=directors[*],url=poster,genres[*],rated,runtime,languages[*],cast[*],writers[*],awards.text,imdb.rating,tomatoes.viewer.rating,tomatoes.critic.rating,NO_PAYLOAD", + "results_per_query": 10, + "credentials": "", + "eval_credentials": "", + "tags": [ + "Movies", + "MongoDB", + "Internal" + ] +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 6adb50f0a..413cd3cf2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -31,3 +31,6 @@ channels channels-redis tika drf-yasg +pymongo + + diff --git a/swirl/connectors/__init__.py b/swirl/connectors/__init__.py index 723dd13a1..47fc7d951 100644 --- a/swirl/connectors/__init__.py +++ b/swirl/connectors/__init__.py @@ -15,7 +15,7 @@ from swirl.connectors.microsoft_graph import M365OutlookCalendar from swirl.connectors.microsoft_graph import M365SharePointSites from swirl.connectors.microsoft_graph import MicrosoftTeams - +from swirl.connectors.mongodb import MongoDB # uncomment the line below to enable PostgreSQL # from swirl.connectors.postgresql import PostgreSQL diff --git a/swirl/connectors/mongodb.py b/swirl/connectors/mongodb.py new file mode 100644 index 000000000..7e22b1933 --- /dev/null +++ b/swirl/connectors/mongodb.py @@ -0,0 +1,131 @@ +''' +@author: Sid Probstein +@contact: sid@swirl.today +''' + +from sys import path +from os import environ + +from pymongo.mongo_client import MongoClient +from pymongo.server_api import ServerApi +import json + +import django + +from swirl.utils import swirl_setdir +path.append(swirl_setdir()) # path to settings.py file +environ.setdefault('DJANGO_SETTINGS_MODULE', 'swirl_server.settings') +django.setup() + +from celery.utils.log import get_task_logger +from logging import DEBUG +logger = get_task_logger(__name__) +# logger.setLevel(DEBUG) + +from swirl.connectors.db_connector import DBConnector +from swirl.connectors.utils import bind_query_mappings + +######################################## +######################################## + +class MongoDB(DBConnector): + + type = "MongoDB" + + ######################################## + + def construct_query(self): + + logger.debug(f"{self}: construct_query()") + + self.count_query = None + + query_to_provider = bind_query_mappings(self.provider.query_template, self.provider.query_mappings) + if '{query_string}' in query_to_provider: + # add escaped \\" around each word so the search is an "All" instead of "Any" :\ + mongo_query = '' + for term in self.query_string_to_provider.split(): + if term.startswith('-'): + mongo_query = mongo_query + term + ' ' + else: + mongo_query = mongo_query + '\\"' + term + '\\"' + ' ' + query_to_provider = query_to_provider.replace('{query_string}', mongo_query.strip()) + + try: + # convert string to json + query_to_provider = query_to_provider.replace("'",'"') + self.query_to_provider = json.loads(query_to_provider) + except Exception as err: + self.error(f"Error converting query to JSON: {err}") + self.status = 'ERR' + + return + + ######################################## + + def validate_query(self, session=None): + + logger.debug(f"{self}: validate_query()") + if self.status == 'ERR': + return False + + return True + + ######################################## + + def execute_search(self, session=None): + + logger.debug(f"{self}: execute_search()") + + # connect to the db + config = self.provider.url.split(':') + if len(config) != 2: + self.error(f'Invalid configuration: {config}') + self.status = 'ERR_INVALID_CONFIG' + return + + mongo_uri = self.provider.credentials + database_name = config[0] + collection_name = config[1] + + try: + client = MongoClient(mongo_uri, server_api=ServerApi('1')) + db = client[database_name] + collection = db[collection_name] + # warning: query to provider is a json object + found = collection.count_documents(self.query_to_provider) + + except Exception as err: + self.error(f"{err} connecting to {self.type}") + return + + logger.debug(f"{self}: count {found}") + + if found == 0: + self.message(f"Retrieved 0 of 0 results from: {self.provider.name}") + self.status = 'READY' + self.found = 0 + self.retrieved = 0 + return + # end if + + # now run the actual query + try: + if self.search.sort.lower() == 'date': + if 'DATE_SORT' in self.query_mappings: + results = collection.find(self.query_to_provider).sort(self.query_mappings['DATE_SORT'], -1).limit(self.provider.results_per_query) + else: + self.warning("Date sort requested, but `DATE_SORT` missing from `query_mappings`, ignoring") + results = collection.find(self.query_to_provider).limit(self.provider.results_per_query) + else: + results = collection.find(self.query_to_provider).limit(self.provider.results_per_query) + self.response = list(results) + except Exception as err: + self.error(f"{err} connecting to {self.type}") + finally: + client.close() + + self.found = found + self.retrieved = len(self.response) + return + diff --git a/swirl/models.py b/swirl/models.py index c01e9c2fb..2a55343c5 100644 --- a/swirl/models.py +++ b/swirl/models.py @@ -81,6 +81,7 @@ class SearchProvider(models.Model): ('M365OutlookCalendar', 'M365 Outlook Calendar'), ('M365SharePointSites', 'M365 SharePoint Sites'), ('MicrosoftTeams', 'Microsoft Teams'), + ('MongoDB', 'MongoDB') ] connector = models.CharField(max_length=200, default='RequestsGet', choices=CONNECTOR_CHOICES) url = models.CharField(max_length=2048, default=str, blank=True) From 6a897c0f8d00ae331ad67ab65f25ff740af1fd68 Mon Sep 17 00:00:00 2001 From: Sid Date: Thu, 7 Dec 2023 12:36:42 -0500 Subject: [PATCH 76/88] MongoDB connector --- requirements.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 413cd3cf2..f491941d5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -32,5 +32,3 @@ channels-redis tika drf-yasg pymongo - - From 55ceb65bbe8de7b2d7397d9da22e680c84057191 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Thu, 7 Dec 2023 13:18:18 -0500 Subject: [PATCH 77/88] update ServiceNow Knowledge SP + clean db.dist --- SearchProviders/preloaded.json | 6 +++--- SearchProviders/servicenow.json | 6 +++--- db.sqlite3.dist | Bin 299008 -> 303104 bytes 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/SearchProviders/preloaded.json b/SearchProviders/preloaded.json index 4686bae83..1e55d63e7 100644 --- a/SearchProviders/preloaded.json +++ b/SearchProviders/preloaded.json @@ -1012,8 +1012,8 @@ "default": false, "authenticator": "", "connector": "RequestsGet", - "url": "https://.service-now.com/api/sn_km_api/knowledge/articles", - "query_template": "{url}?query={query_string}&fields=language,published,rating,author,kb_knowledge_base", + "url": "https://.service-now.com/api/sn_km_api/knowledge/articles?fields=active,article_type,author,kb_category,sys_created_on,helpful_count,kb_knowledge_base,published,rating,topic,sys_updated_on,sys_updated_by,sys_view_count", + "query_template": "{url}?query={query_string}", "post_query_template": "{}", "http_request_headers": { "Accept": "application/json" @@ -1028,7 +1028,7 @@ "CosineRelevancyResultProcessor" ], "response_mappings": "FOUND=result.meta.count,RESULTS=result.articles", - "result_mappings": "title=title,body=snippet,date_published=fields.published.value,author=fields.author.display_value,url='https://.service-now.com/kb_view.do?sysparm_article={number}',fields.kb_knowledge_base.display_value,fields.language.display_value,fields.rating.display_value,NO_PAYLOAD", + "result_mappings": "title=title,body=snippet,date_published=fields.published.value,author=fields.author.display_value,url='https://.service-now.com/kb_view.do?sysparm_article={number}',fields.active.display_value,fields.article_type.display_value,fields.kb_category.display_value,fields.sys_created_on.display_value,fields.helpful_count.value,fields.kb_knowledge_base.display_value,fields.rating.value,fields.topic.display_value,fields.sys_updated_on.display_value,fields.sys_updated_by.display_value,fields.sys_view_count.value,NO_PAYLOAD", "results_per_query": 10, "credentials": "HTTPBasicAuth('servicenow-username','servicenow-password')", "eval_credentials": "", diff --git a/SearchProviders/servicenow.json b/SearchProviders/servicenow.json index b514c7ae3..df9b985b2 100644 --- a/SearchProviders/servicenow.json +++ b/SearchProviders/servicenow.json @@ -5,8 +5,8 @@ "default": false, "authenticator": "", "connector": "RequestsGet", - "url": "https://.service-now.com/api/sn_km_api/knowledge/articles", - "query_template": "{url}?query={query_string}&fields=language,published,rating,author,kb_knowledge_base", + "url": "https://.service-now.com/api/sn_km_api/knowledge/articles?fields=active,article_type,author,kb_category,sys_created_on,helpful_count,kb_knowledge_base,published,rating,topic,sys_updated_on,sys_updated_by,sys_view_count", + "query_template": "{url}?query={query_string}", "post_query_template": "{}", "http_request_headers": { "Accept": "application/json" @@ -21,7 +21,7 @@ "CosineRelevancyResultProcessor" ], "response_mappings": "FOUND=result.meta.count,RESULTS=result.articles", - "result_mappings": "title=title,body=snippet,date_published=fields.published.value,author=fields.author.display_value,url='https://.service-now.com/kb_view.do?sysparm_article={number}',fields.kb_knowledge_base.display_value,fields.language.display_value,fields.rating.display_value,NO_PAYLOAD", + "result_mappings": "title=title,body=snippet,date_published=fields.published.value,author=fields.author.display_value,url='https://.service-now.com/kb_view.do?sysparm_article={number}',fields.active.display_value,fields.article_type.display_value,fields.kb_category.display_value,fields.sys_created_on.display_value,fields.helpful_count.value,fields.kb_knowledge_base.display_value,fields.rating.value,fields.topic.display_value,fields.sys_updated_on.display_value,fields.sys_updated_by.display_value,fields.sys_view_count.value,NO_PAYLOAD", "results_per_query": 10, "credentials": "HTTPBasicAuth('servicenow-username','servicenow-password')", "eval_credentials": "", diff --git a/db.sqlite3.dist b/db.sqlite3.dist index 839e166109ad6b72f3f3388a6052c29ab2b425af..fb544e30b59209ac0b1a51318f792ab28fa9213d 100644 GIT binary patch delta 3911 zcmZveU2I%O6~}k&adDbB>%<@Huk}}BQwQgI?tG7_73>hQNt31_!3~tktasPmjh*$z zyS8zlG))LbSQh%JrVOUBi(%nAW*vgO~INjwl zyR+Ab0&zr$^QF!sh9`X%6sJ3U#r;k*3Y`SoPA7eq>e|9EhBDpmv-z&xaCVt(BMK+l zbascFO?{F-?AseI3I+wUo%gzogJO_E%$(ib&_-5}P{HJ;Jmi2#Ir}hZ|O|ON=@jJ*(YHI-V{}ea?Aj zY4Gvl2wI$Agz)9^H_GulQM`5 z0%3)2ECFSd!Xt>gTJbe?y#xHZek2qC6&yv&JmS828ML|Au7L#=Pl0sEA+{u?h(&}L z5YAO~0DLyIDQpQ_Qae)N5yaKzQ{Xv6!8AaT3Xfo+&JBb20TD!1 zo(883A4CsEebx~Qj}WZ>z8@?Y3dE!yvQY|6IAm^V5+v1k4uG%3(c@HO%f+~7#ORsZH!nHLZtV$O&t(Q)LjHF8Vby?6f{cV5lLOG7*h@d z6NU^SZc{r-<`ITH*?^lk2%6RCIQT`~{>NJ+7L+!LI^lp|_g4rE2GUPYfd7WvQj6#d zN{H=leZnACI;oQ(XVlUIQCDD;!XwyyOV_UY+ad6=(BVCHkEapVEP zSUt!ACvSu9Is`BTSVxt ziEtheU7(3o@Z->Na0?U4^i{Pj+<@qCH@*xmhVCG4>8nfPhyx4=k>0H8bibDY`_;QI z1G}#Cburi*dtt}qWMgB~Q&%obwoNuIG`-sJ+OzJy3}Cfna|`MhN7O|U5bjNvR`EUSVsrS>`XRO&Cf3_ua8#N zD>K>Uy#AV-St^Yd@{8rUmBpFt(n_i7{cxeQw6>Vf&F5!kGnM>kd1ZF7SSjRlqsy6U zu{1wgT`Cu|{wgcwoL*(nU&dQC_@0}Y4V>j0V}U{IF|leH80b zX}fK9-M6)RK5Lu!zk>^}fN9sx0e<}wXxJIQ7i;R=(bKRQzjr&+d^7I;=<8s-R*J&X z6-A=TBp`&TU01OT{=JQBI(5rJIYFT?6sML&_AY|;7NHA(sB{nu?AJafRg2rrRo-bybK_O#GRw za#e&ux^JUV4TNWXnccEJ?hAqp^Wp}-oXz=LoAVBXLSb0!4B2dR-tiGqjG2F~`3DuN z^c2UOasQj<`zzluRwkvQ#k~1Wt3TLMa2Y2^7TZI1b~(cYl44uPt}bJgVTOyjkfGLs z;{{QWF#qY+UCv2ZB1wuZq2h9@>jmZ<%3^cK^0{S<3POt6kd5aGmF$|xFfyU-XfoN| z>|ZmL{Gel9r6~+IHDUgjxmcmB5i+IxyV{`RG?Wrie{b97xK%1i74u(dQ?8XkPW;c> zkZVN}a(`_*b*#ogBK!mG(zP;)xPPu)xYn?ISKIG*tipVN`ZSMSt3Xux$MeLsazUW~ zo4o6pAT|%T@~&qBVH3qe1vfS>k!h?qms=^K%KuBD$2pBjY6`uf1LtqIwDct7h!R*^ zSJS+?w={ZWa>jqZqsi4_u84o8qtmfcB}}IN*l~|*rCiBRI)A?Rp-KEC>tjR1JK3=l zh%7ujKRrCqk2mc@Kbh?xD3|sheq{JS@7_K#^6Y`xZvMpF$o|KUQ~cvn-=5u5d-`Ul z_V@dLI=b=p#|iWpWQ@=k%csk!i{&)-)YBm{E@^j0wjOlkEs^hsy|+!51gMdi|dUa48*^5oZ6AWR?gR z%7Xh^z?44N2!2r;*J7khNt3je2pk=0wA(>k22InM@+Np{i4w6RQ+ZH~X_-ffq*zvK zBSMs(-vIW<0%V@KJcQ}7s?#DOiN1C>*yAXScMK;gEFnlgvjzM$Rf&jskPw0r5)l#1 z^xQpQU+fI(*^y?r+N+3gs{eT}m`dLptD-PhBurFTf`WHm2aUngPXVBRwH-X4!Z9hw zf{H|`C6eu&D9!JBd?;Al1lsjjH|UO4nR!Y`Mq#z95n*A6v<5%rAglj@!EQ%uCey~5 zkbFxxv88U+Mmy-p1XQLGl4O=h6w2(8^+DzV(4f0|z;9#eWgZh)CJjSGSk(vmc7XrI zJmBU|Cs>MV4?;rV(pZ>uXLZmET4INDPe>D7LN`Q&@~FR!wnf+Vfp24-#68Z4!iic- zh}4~rfQycTAw_AzL~zd%4t1&s-f$E|GQ01qio!ihkkl9Z!8J#L8H7m<#63&Q4E=dG zxZx;}*?B0@D@(A_{~iD*V_6fPxpk#VsCh_~X>fag*^B5=ee-cZ9W95ZB&0eb;aP%s z&@~TQf{%tlPQN*5=BhHg!lPW8r(BZO5+-BXp#vc6XbHiJCuCB1mWW8J&AAJ)Q5GIj zf~iDz4dF2O`~+y!rauPL+ip*>@E}KsC$wZlg127>O?LjXdgVcIE7h8Cb!)I4!VZ*P75n!6Xf7g3qRV|}kUj}ogC8CNZR+sk*!1Y!j_K0O_(ExXtgE~*GCesrF*e$DtTaD4Gu~BN zn4dU$tZQoI(A3P)5l^qt^~7eJwY zbaJjdU3&IVF!M9;ZZI(l9*%oT5mHHONP3nmtuXz~C^!{sQR!j9u}ZktLP8+bXAXmZ zr7Bmj^bkb~CbVcokpA@qxa=HZrp9cBq#Rm8xxVlWC^#Ku?i};ds8$Hi5*gh)I&%bE zPv4n7U$bU){e0ao)8{Xg8s1L_t#5&1B{AkFie$?Ch^Q)IM3zQMpL+|G(!rH7c&wte z&vnR?#v&qY=@K*A_4>J4@M5VgkWuke?EQY z6sWs1ovT@0TmKdKFr70|uSy4VXTd;aPL#*7+0h9D9g$$M9W?3xo(1W2@aZDxsl=?z z6AB`dFwPNS;kH_@<%{5EY*#4HJe2HmO-QpPWa;KIqY3oFY4BYvRmw9$m9%FpLGb%M N6F$Eh-T-&h{vWrFX-5D6 From 03a796ba9e9f825edf4a0f7eee445e5dd31f45e9 Mon Sep 17 00:00:00 2001 From: Sid Date: Thu, 7 Dec 2023 15:07:09 -0500 Subject: [PATCH 78/88] Add DATE_SORT to MongoDB SP --- SearchProviders/movies_mongodb.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SearchProviders/movies_mongodb.json b/SearchProviders/movies_mongodb.json index a3d18e17f..55fd99452 100644 --- a/SearchProviders/movies_mongodb.json +++ b/SearchProviders/movies_mongodb.json @@ -12,7 +12,7 @@ "query_processors": [ "AdaptiveQueryProcessor" ], - "query_mappings": "NOT_CHAR=-", + "query_mappings": "DATE_SORT=year,NOT_CHAR=-", "result_grouping_field": "", "result_processors": [ "MappingResultProcessor", From df177336103a63fdac675554a2b597b7edfef3ce Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Thu, 7 Dec 2023 15:21:29 -0500 Subject: [PATCH 79/88] update mongodb movies SP; add to preloaded.json; new db.dist --- SearchProviders/movies_mongodb.json | 8 +++---- SearchProviders/preloaded.json | 31 ++++++++++++++++++++++++++++ db.sqlite3.dist | Bin 299008 -> 303104 bytes 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/SearchProviders/movies_mongodb.json b/SearchProviders/movies_mongodb.json index 55fd99452..cb4b3e67c 100644 --- a/SearchProviders/movies_mongodb.json +++ b/SearchProviders/movies_mongodb.json @@ -1,6 +1,6 @@ { - "name": "Netflix Movies - MongoDB", - "active": true, + "name": "IMDB Movie Samples - MongoDB", + "active": false, "default": false, "authenticator": "", "connector": "MongoDB", @@ -19,9 +19,9 @@ "CosineRelevancyResultProcessor" ], "response_mappings": "", - "result_mappings": "title=name,body=fullplot,date_published=year,date_published_display=year,author=directors[*],url=poster,genres[*],rated,runtime,languages[*],cast[*],writers[*],awards.text,imdb.rating,tomatoes.viewer.rating,tomatoes.critic.rating,NO_PAYLOAD", + "result_mappings": "title=name,body=fullplot,date_published=released,date_published_display=year,author=directors[*],url=poster,lastupdated,genres[*],rated,runtime,languages[*],cast[*],writers[*],awards.text,imdb.rating,tomatoes.viewer.rating,tomatoes.critic.rating,NO_PAYLOAD", "results_per_query": 10, - "credentials": "", + "credentials": "mongodb+srv://:@.mongodb.net/?retryWrites=true&w=majority", "eval_credentials": "", "tags": [ "Movies", diff --git a/SearchProviders/preloaded.json b/SearchProviders/preloaded.json index 4686bae83..867d7a395 100644 --- a/SearchProviders/preloaded.json +++ b/SearchProviders/preloaded.json @@ -1354,5 +1354,36 @@ "Library", "Items" ] + }, + { + "name": "IMDB Movie Samples - MongoDB", + "active": false, + "default": false, + "authenticator": "", + "connector": "MongoDB", + "url": "sample_mflix:movies", + "query_template": "{'$text': {'$search': '{query_string}'}}", + "post_query_template": {}, + "http_request_headers": {}, + "page_fetch_config_json": {}, + "query_processors": [ + "AdaptiveQueryProcessor" + ], + "query_mappings": "DATE_SORT=year,NOT_CHAR=-", + "result_grouping_field": "", + "result_processors": [ + "MappingResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "", + "result_mappings": "title=name,body=fullplot,date_published=released,date_published_display=year,author=directors[*],url=poster,lastupdated,genres[*],rated,runtime,languages[*],cast[*],writers[*],awards.text,imdb.rating,tomatoes.viewer.rating,tomatoes.critic.rating,NO_PAYLOAD", + "results_per_query": 10, + "credentials": "mongodb+srv://:@.mongodb.net/?retryWrites=true&w=majority", + "eval_credentials": "", + "tags": [ + "Movies", + "MongoDB", + "Internal" + ] } ] diff --git a/db.sqlite3.dist b/db.sqlite3.dist index 839e166109ad6b72f3f3388a6052c29ab2b425af..ca6157eb5e75efe53a6a425368a3961f3589b48c 100644 GIT binary patch delta 4061 zcmZveU2I%O7012VaY!ocjg#2xueI04u^WF}-}#Bx|Rx)7Y4gU^#2=jlIc! z<=$O8j%(6X4N{>Gg)3!wpe;zeB2gmcK7a_~r7xhO@}a!6LP(GYkWeZ?B`O4ScZ0cO z-+j7!e)FF*bIzGF^Xm7HzWR%!Z*?{18X6jAz8#+o<=(s6e!PXpkv;*92~0Et3msDD zjpGA~6$*io^Yi1As+AKAo#xh=sDB1XnV<`UjSX<#X^FNu_gYV?DxyQ^JZep;D&3%H zVPIE9+EkUq6gh=9Ls99RQT%ehRL$0W{jP2Bn13xgVFai8oKjn-GZc-f3?Ut+;x%?N z8av|C2}4lgyc0d;U$~$G>cshL^pt-ZIS?5<6_e9+IP>j2DnmLmQr$K7_8vwAf<~&V z#-8kDkWoF=S!1xHM_C1COs3*$#|6a`Vw|#6drk4%4s{iXQchCQ8jE%AVFV+QYOAre z&K`eo1wx3kM5_$$3FoOyqo>x5#}1rvsI zugg%a6btBl&;?a1f($z?-9)wOI>pXnxA0rJ!M!4Wx0|b0Y;fW{=~lAJ4PA!7Au*(k z6~gnj8N;fTb8I+2jVUYRf>6Pnf5()SaXA5n&eaw^23mC3Czb^Lgu7PlkV2lm?TIn05(8JOJ@ZLk@ z@n1DJg7D$ch&%pn_**weYO(R!m#!HCVi{f?;W3EDbd?YtyMI0cesjRD5QNAU7Nqb9 zW$qu3fhOQra9u#62Pr(lU{xxpMC+nq$n6;kOV{xY9687y*>y&4*h>KP@@b zgc8?&9=xphzy?FIYwgPn)I7q4`}bjRLs1|qv&n)K9>K6$NP?*Qy)m#ILc58EnmijM zc>9u2NM2Y^moHpesNNj_-L5$f#(h~r8WB2W0UN6m4&6rx+*TBr$X`IXk>(Lbt5PxU zIt(Nf8PbIy0lPz*N2uY+`l^vhaKs&*06+6Z25CCSR9=PqVp%7Q`?OOD@E@OFEKi`k zMgyw1P8e}zRFgh$SVM+fXFvtk3Bg_-PP*YK@QJTPuqNw<;y_0pA<&JV2M-m6tXmEP zB8fGRAmN5m;F~@L(u9ylM+61JBNVz1r@>Q2!KoMbAcaQ^=6-k){7X^D1AyhrCm`4! zF}VBbEO^BiFrp#C3@Bd~dz(<5Am*L)CHJ#SfG9Gk3#tc{9??7^&+S*|!P`Cs)(np2 z>ubNGIw7Pg6~tX!1jiMbz}%pL3H1nvo(#F~e;Hg+WL!ry=saT0Bgm-N!NwJkP-K+J z3qGJ&u_lS3bNAyV@QSZQsirfXi-4n1CkFG%yTg4a4I)07e77P>f=)%9aO6JD0LPbF zqH&q`_21N4+A?}0gpvM<5qRE10 zvWEwhk4KE!{StML<-uPXPww_Tsxh7FI3TNaQf*0qR+ro0M?POvBTUHW#C~6O!nqsS z*rSjaxO5bx@Cd{1lUv}n?*^k9G??u0K?;xXTBqx%%5)C9AMAkd`m{_o5hQ}5S0}QG zeCIVV=i3p}bjampw%=QwAm)Dl9%%OYpt6Y|1Ou8#oz&|(>VEAF@QKe6)i{BW2g*ER zJiE_7eG?oG-;V=uFw`{D&@>}oFK3$W+`}WWhI5CL$wP;ly6{pRN2#+c?2gFTblwYiq_=vpp& zYa;K>Z*LC`l+9b^p^3Qsvdy%$x-P$mwr^HUYirpqTiL?e&d^S^F9l-FQ^|#M%ky)W z7ZO`?WcBKKFk3yXf#>dAtK@Q}T(LZwNtey#Qe`EVwb#u|!ZLGa+BP$L?8}*~UCO1m zYSWCSE9Lc~mB?f*bG2Nw>?^}dqZKQcC>8CpX^rO6cDYjWCeDnmnFY(N4X|qetxBPs z&6{`Rpu$=uy;f(dQsGUqX=UXEwc+W_w3V?nZ>yu(d}c+HdbzXFaxtGS7foA}vDh@N zedbj;MRxUn=9lJ{FD1Y7;#_j-xxAN+%*u#uZA^^c89(*x|5(K~twK6)o}Sn>mD0Ao zS+p{z&(utrm9f=a#oO9x?U_m|nC0cIP)F34F5>D0M~VxR8~>fEqUtVxlZGF*QLFiNX8L7n$#N?kvx5{@*?4{Lg)y z{Cd&Jj~1P6t{ktZs2Dhx?M;kd-ri7KO|uwv6V{DX04ax5=rgq)P82hSnEtA^*NqY- zxvr{Pn~r}0V}j6dXJrN2qN~&O`gGkgR|O%K`bu5iQ6a979PW6iqV=u{F^Y*kP_JAS zVUTWKY*mBBwSK7YtcmA>Aj3T0t|#i7^poj~!=O+Y=CegMna(VX>mbEgUretkB`yQS zF=t%=o?cP$S%xfm_U+mF0wmw7-g8@d{dF3##Sc_ zq99>{Q%W8gPs2hfvD6+GQ^E?L7@I3<7Os^ zZG`?e<7OrhwpFZbb!+1iVWy|%Mk%67|J2%D8bTRJCMnU`Hj;TbnPHG4N?@|GBE6(< zZ0G(x2lU5nbxx9Y;uX>7+p=eH;79jyFaobNu;(JBHRD zq4=e-{tfHKH}p@A4-M$=US0e!2Lb~I86))6#5)s-nTdJev-$<8oAw~87)aeu{hgXk z{gJwp`XzO9x_aWoR_=NrH`$+!&K3AA2& zQVWR^kt~q``PgbKphQwE%ViNE${cD3L$QijU{4V{tUg6ZBr&s3fDMkqCdRN=5fXyT z-cE2UQ7VQ7kPw1AJ%|Wq=HODWIkpB3!mZ74c~%kO)cn~6#^-g#9254egkD8RP+@-` zXfPix2X7^COvSOF!mA32WUF?M9a^(#xbSudXfZG6KrUty3zU$I!tyL5!l1c=!JCf4 zZppB)GFw>?5>5(Mku#cuo@A*w76_=U={{LVWZ~Q?P+Pck9Lz5q=ire-suwIYPj`ba zV;y1v6IlADJtEBek^Ua=AXWgkr#it>lxI>T1VVG55A?;X`cap}b%f%S^H# z{1uxS9&kn!_R2y+q{*%ZGme5GMX6`@c@Po~O(GB8breLh@P;pQg9jl&(##BiSx12x zguZ#nTup?&= zP+u`2q1;UG1+7lQ><+azbGbqUA(1e=J<_ITKlrV(bj;Y-yJX6~uFJX>k!bRaI0!zC zEfzjuA@_|?L>M=z!&XsR$G$=klfqk2NMxaX#G3QqDA*CZ+hkzxL+ZPCB0?fh3)a&v zbL}|zAf{CT$M$0KjCn+`H1D4R8)H=|`!;}xcqTj|jF`txgDOy}N(J@>5Z)x7?uCSe g`?k((Jp=AK>sX2M9jGe67(rOH>V&yB1s+ZQ2YY-Eo&W#< From 6b64156e8a7d9740c3874b1a5bd467cc5d69e812 Mon Sep 17 00:00:00 2001 From: Joe Dinsmoor Date: Fri, 8 Dec 2023 11:39:52 -0500 Subject: [PATCH 80/88] Some fixes for this issue, still raises int error --- .swirl | 1 + swirl.py | 6 +++++- swirl/utils.py | 14 +++++++------- 3 files changed, 13 insertions(+), 8 deletions(-) create mode 100644 .swirl diff --git a/.swirl b/.swirl new file mode 100644 index 000000000..dae670a02 --- /dev/null +++ b/.swirl @@ -0,0 +1 @@ +{"django": 90204, "celery-worker": 90231} \ No newline at end of file diff --git a/swirl.py b/swirl.py index aa87b4560..71a1c63ed 100755 --- a/swirl.py +++ b/swirl.py @@ -42,7 +42,11 @@ def get_swirl_version(): if match: version = match.group(1) except Exception as err: - print('Error while checking version; startup continuing') + if err != '[Errno 1] Unknown host': + print('Error while checking version; startup continuing') + print(err) + else: + pass finally: return version.strip() diff --git a/swirl/utils.py b/swirl/utils.py index a44cfc0fc..cda680564 100644 --- a/swirl/utils.py +++ b/swirl/utils.py @@ -108,7 +108,8 @@ def get_page_fetcher_or_none(url): user_list = user.objects.all() user_count = len(user_list) hostname = socket.gethostname() - domain_name = socket.gethostbyaddr() + #ip_addr = socket.gethostbyname(hostname) + #domain_name = socket.gethostbyaddr(ip_addr) headers = SWIRL_CONTAINER_AGENT if is_running_in_docker() else SWIRL_MACHINE_AGENT """ @@ -121,10 +122,10 @@ def get_page_fetcher_or_none(url): """ info = [ len(search_provider_count), - SearchViewSet.report(), + SearchViewSet.report(self=SearchViewSet), user_count, hostname, - domain_name[0] + #domain_name[0] ] newurl = url_merger(url, info) if (pf := PageFetcherFactory.alloc_page_fetcher(url=newurl, options= { @@ -139,9 +140,8 @@ def get_page_fetcher_or_none(url): def url_merger(url, info): data = '' for inf in info: - data = data.join("info=") - data = data.join(inf) - data = data.join("&") + info[inf] = "info=" + inf + data = data.join('\&', info) url = url.join(data) return url @@ -163,7 +163,7 @@ def get_url_details(request): http_auth_clazz_strings = ['HTTPBasicAuth', 'HTTPDigestAuth', 'HTTProxyAuth'] def http_auth_parse(str): """ - returns a tule of : 'HTTPBasicAuth'|'HTTPDigestAuth'|'HTTProxyAuth', [] + returns a tuple of : 'HTTPBasicAuth'|'HTTPDigestAuth'|'HTTProxyAuth', [] """ if not str: return '',[] From a0a84358573bb0c306d8033e11a71ec1a51f9b9d Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Fri, 8 Dec 2023 12:41:26 -0500 Subject: [PATCH 81/88] fix db.dist for the release --- db.sqlite3.dist | Bin 303104 -> 303104 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/db.sqlite3.dist b/db.sqlite3.dist index ca6157eb5e75efe53a6a425368a3961f3589b48c..a309aebfe08be241351dcc5293957a612360a39d 100644 GIT binary patch delta 3915 zcmZvfU2Igx6~}ki;|MTz4A@@QA8VT!Cnn4F%-lO)ZV+-HVGAYT7&%GP$X)L)>|)2e zc-Q8mq9IVC(&nYT61NYfs8XdqRe~TfN}Ckam-HoVm9$kKnutDBJoKR=Qd_B2rJlQ& zdhdAei={ok`=4{>oH=L4m+o)BbbtG;b3NPS2oX~@p0c@xZDkALUh5e%tVA&`-Cy?X zH?0y&>UQ-WPKH+?oCVQ#xO zlV38dOj1nUza^h*t^?mP2Xd1gf^|~v?9Q~wD8(#0*kCs{Fh@v*vjYwGa3dq!k=gzR zBdN3zh0Se(-LIz(8J;MI8fAAj6rZNj-RoI^2&tpE8}FYo1vXbQ+t(1B>Q6V*>I;Y< zVqY($b+`KnO8ArE?M4VhL0a8E^gm--Ip)Oe8o;KNQO9wQ4eU0oSU|bms{_gi6gxz@ zzZszBr`S=#-8Sfitx`#w5O;1+npQ`0;ock+rj>B8J{)|`uu35%ah)N(mgw5 zo8K}lsQZf{Vp<98!u{`%8JUm*lJk688Q%(K3&(wTqm?MG+&`wLjBh!oHg!jb=}@SO zlG{=U+}0jT?w-yS-YhS>KOXKib=aZQy*@l*SQ&)F+>eHhVo62>W$%rA_t^8|WmP?R z;9%{{!pxDZR4Zd{%I|XxEgQe0A}a z)8u%0#{IamYuzQomV+6bxy#jes`0zk#GmhVOvGZb>~8pTXjDI(Z2R=Vv%$v56k(R* zD99spmc5tIy?6srRR6Ub{Vow!*n|oW^PpAX6Cw439jFt96-02G@hF8)1oJ@gO?`bQ z`c+$4hHVMaMag`^Wgv?Wp=`(-hy8P`>{eNW5Q24m6ulhU5x1zo(q>T#pEyE4dJ4T{ zD2NT`G$IGw@`2dUVJiHkgYMhByAB}Ox1=F=>&`ih&vm{J%8Ce-15@Y@MJ!sKT z2x_b7TKYuU-tsg`>L2b!--y#tp_$~+G*O}XM9KrDT>a4T6W-fnXiyg?(PXGf+)|Dd zPJ}ujOz3+QU1;00?vGoNQ0S&se*t0M_b#Iz@8(7Hguckptl^9cK}keTxaAY7Ja9JZ zJ-Hus>&z7TATbeg%N!yZ+z1=bEw_B4x9vkTbdH22m0~zzV*w#dLpG;F`4<)(W*8o= z9s(kSpZ^Knb`X6LIvT=)Ix`qKErA8Z=6d8sbk|ToS5O&on1tmMrF1-tzGEn$|2T>C z;S8rmQSdoKRJT_9dFiESQw_aifE$-;aLCkD7qNRm$U@26^jrA zBxvwA^nZ>c$B;20l!_>fw3vXtVEU`G=tiO$nS>!n9XnzG1%!F8%_64f=1`B}$HrX1 zNNcH!w0z=l5ByO5(W~fnLk73EZAT38HT|DPqzqfKCz|#{0%go z=xRr$*w!msUfQ((i8LzR86BBJUa5dyXwDjC*@|-(5kWvW*MBM?H&kq8!TZIIszg8p z)z?p=e;5krVZtMtSXn-iSl=z7PeKX_&pOPb`Y#}y={u)U+DM6nmlKXSw9@hk5&i{` z)H~ipUnDwWpT#;}*fi7je(WjxwurQ?aSrs<+Wla zw^A?Xmx}ZC^HunrGK&lI`CPqtsM=JS=+ z<+}gj;&SEeQn7HVIKPmq6*JYR4imxa`p1^sZ706E$17ntX2!K%C5#T{;I+6 z!u&$uWUX908!YSnC5QUG2ZJc>4PSv#|JnkYh_^T5^={=+vLh68U+yXyH}TlPnYYlKM+!(> zzl7R5;`d{nsm+7!kK*_59$)NVy9SLAc0?H?vbBFe283c9JfQydF52AY-CjZ2uofOl@E=-KkUoJo8}!S# zK3s!`(bi6QTPC)A80)NU`4A@3?JeHn8=;8={v_q_l!;o+CvZ3X{04e4;q_leN5eq? RA~Z4xpEy!~|1#=L{153vuqprm delta 3745 zcmZveU2Igx6~}k4kBI{~F<`IlwRde3AOY;le1Ev1)WLxblt8fMR*C{yYGXS##=&+m zc_gDvQSfnFpqxJyv?y=!T>iVyFo&*7nHQ)?l5xn-xTVhH2t?UstGLYkA|ZFg0y;HoUxU~9=`gdkk$uCZrZOW8dttvKV+wbrjWR<1B& z(I>4BXTM`t5-lqw`~{r>5>+YG1mneWHmr&eBE{Yo8`ka7ZF_~k z>e}pV0c(wL^w;7;u2m=|qx(A0Dl0}ZI@+<_u_7guj$ZE2&PEZZD*AH=ahDP-Xtb%5 zxmIKsMl+si*9v3>{^kHK*UOpH>p(O?R<$kYn7B#N53y&*UGpO z(N`sBXVQLv2ob@jQ{?E}@( zGm|sX8(l4~j!LYfD_z~L6-lI{cel@%DyYxm>QY!~PxIYHsbR`|#u4wSy&H2ZdT;YJ-Ca?Zc!G zxr8AazrP4u%zvBUtspB?jM?_%lO=?Unk?Sk22+;F`A7(7X6_+)!tp`1Ah!Rkxh)(fL@M*yE|_u@gxeby zK1D)k8tnM#Cjrba`{21extjqvv>DI*F|dSVwtu2JfAH9FytEBE&Db8;leuORGKIMC zoGvA@=D9t7K|sNLO5m`grOf_3(I;U-1W)d|-B|6ABO^#D<~b}06T)>(mbh$)@81Va zrnevd5bV5rViF=rxt(-t;+qn)X)lmWUQt_Xb~1WWIVEB_Cv9{KCLVyc%+1h{XuDNU zPNsyVm6zO1o2eg!&oh}rLz_;Vdi^GZVbgs8Za4~?&JuefO2dQ@W%3nxC8HprQnsv= zFBS>m*xYywZaWIellc1-3DLrQa2USG@5;p6)&;e18&A9wqRpKn@IoeNEF?^5Y@ZIR z+f_=)O5ZtTzB~pjSaHTej1)(ngiHyJdq#kp*N0#-oFya#l7>;7XRkk zP&C6MP;j)0O3l4joDf?AiPl){f+LfNa9_>RFd?Lt##@ZdjG@f=Xvl?~>Yn7S5sl*a z7NI%Gr-HfX1bmfmT9;p++xY$Uk2maFSB8oF`l#A;r+E3vtzEfq-8(SQ(9rn6(w-&W zGSE2HxK#gxr{WXOfmmr@JVXLX<*Bxm=-OTEkAvr-J^uO`=+LJp$EHW;_D)x4#uuvN zW4*Hrr=}<8CdNj4&sFCqXU2Q03-c3a&-G57Iyp6S_WbnN==j*lQ`Nb#_~T9}$0sDL zGhaLpJM;0MUW5nlS^iG^{)H{?>nxw�{^Ka@dszCCg@p-W(%hJU4;%%@Qu`{Ig$ZHITz>}2 zne1Sptw~|0lcxj{qL;_#{b%4#upxLe*SN7^zW!&yo7byNZv=7sYfxE<3=ak3h*8Xj(ee;$VF#Q~kr1hk9SHNml}k{+At>fH);0VcejOC;nb!yL+*LTTa%N$hCsDqT zr-Ut^`F~d-2;!Sd(4W;JEbWwEyAKg2WV!zs^%8uU8BE;vQbMq&m!@R7qSGn7a2fvX z1WPiErB|B}oqhK-y#n{vUFil`mv20jYdmzlr~4a?@moL7R)YwMl3aPJAtkm=_x%D6 G1pf!02xOoD From 09b511ed0783ab09f76120bb6d7e0d7e47cd7ae5 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Fri, 8 Dec 2023 13:09:16 -0500 Subject: [PATCH 82/88] remove .swirl, it should not be checked in --- .swirl | 1 - 1 file changed, 1 deletion(-) delete mode 100644 .swirl diff --git a/.swirl b/.swirl deleted file mode 100644 index dae670a02..000000000 --- a/.swirl +++ /dev/null @@ -1 +0,0 @@ -{"django": 90204, "celery-worker": 90231} \ No newline at end of file From 177b9126f9966963d7af2c05c6f95f762c141cc3 Mon Sep 17 00:00:00 2001 From: Erik Spears Date: Fri, 8 Dec 2023 13:13:10 -0500 Subject: [PATCH 83/88] cherry-pick update db.dist from develop --- db.sqlite3.dist | Bin 303104 -> 303104 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/db.sqlite3.dist b/db.sqlite3.dist index ca6157eb5e75efe53a6a425368a3961f3589b48c..a309aebfe08be241351dcc5293957a612360a39d 100644 GIT binary patch delta 3915 zcmZvfU2Igx6~}ki;|MTz4A@@QA8VT!Cnn4F%-lO)ZV+-HVGAYT7&%GP$X)L)>|)2e zc-Q8mq9IVC(&nYT61NYfs8XdqRe~TfN}Ckam-HoVm9$kKnutDBJoKR=Qd_B2rJlQ& zdhdAei={ok`=4{>oH=L4m+o)BbbtG;b3NPS2oX~@p0c@xZDkALUh5e%tVA&`-Cy?X zH?0y&>UQ-WPKH+?oCVQ#xO zlV38dOj1nUza^h*t^?mP2Xd1gf^|~v?9Q~wD8(#0*kCs{Fh@v*vjYwGa3dq!k=gzR zBdN3zh0Se(-LIz(8J;MI8fAAj6rZNj-RoI^2&tpE8}FYo1vXbQ+t(1B>Q6V*>I;Y< zVqY($b+`KnO8ArE?M4VhL0a8E^gm--Ip)Oe8o;KNQO9wQ4eU0oSU|bms{_gi6gxz@ zzZszBr`S=#-8Sfitx`#w5O;1+npQ`0;ock+rj>B8J{)|`uu35%ah)N(mgw5 zo8K}lsQZf{Vp<98!u{`%8JUm*lJk688Q%(K3&(wTqm?MG+&`wLjBh!oHg!jb=}@SO zlG{=U+}0jT?w-yS-YhS>KOXKib=aZQy*@l*SQ&)F+>eHhVo62>W$%rA_t^8|WmP?R z;9%{{!pxDZR4Zd{%I|XxEgQe0A}a z)8u%0#{IamYuzQomV+6bxy#jes`0zk#GmhVOvGZb>~8pTXjDI(Z2R=Vv%$v56k(R* zD99spmc5tIy?6srRR6Ub{Vow!*n|oW^PpAX6Cw439jFt96-02G@hF8)1oJ@gO?`bQ z`c+$4hHVMaMag`^Wgv?Wp=`(-hy8P`>{eNW5Q24m6ulhU5x1zo(q>T#pEyE4dJ4T{ zD2NT`G$IGw@`2dUVJiHkgYMhByAB}Ox1=F=>&`ih&vm{J%8Ce-15@Y@MJ!sKT z2x_b7TKYuU-tsg`>L2b!--y#tp_$~+G*O}XM9KrDT>a4T6W-fnXiyg?(PXGf+)|Dd zPJ}ujOz3+QU1;00?vGoNQ0S&se*t0M_b#Iz@8(7Hguckptl^9cK}keTxaAY7Ja9JZ zJ-Hus>&z7TATbeg%N!yZ+z1=bEw_B4x9vkTbdH22m0~zzV*w#dLpG;F`4<)(W*8o= z9s(kSpZ^Knb`X6LIvT=)Ix`qKErA8Z=6d8sbk|ToS5O&on1tmMrF1-tzGEn$|2T>C z;S8rmQSdoKRJT_9dFiESQw_aifE$-;aLCkD7qNRm$U@26^jrA zBxvwA^nZ>c$B;20l!_>fw3vXtVEU`G=tiO$nS>!n9XnzG1%!F8%_64f=1`B}$HrX1 zNNcH!w0z=l5ByO5(W~fnLk73EZAT38HT|DPqzqfKCz|#{0%go z=xRr$*w!msUfQ((i8LzR86BBJUa5dyXwDjC*@|-(5kWvW*MBM?H&kq8!TZIIszg8p z)z?p=e;5krVZtMtSXn-iSl=z7PeKX_&pOPb`Y#}y={u)U+DM6nmlKXSw9@hk5&i{` z)H~ipUnDwWpT#;}*fi7je(WjxwurQ?aSrs<+Wla zw^A?Xmx}ZC^HunrGK&lI`CPqtsM=JS=+ z<+}gj;&SEeQn7HVIKPmq6*JYR4imxa`p1^sZ706E$17ntX2!K%C5#T{;I+6 z!u&$uWUX908!YSnC5QUG2ZJc>4PSv#|JnkYh_^T5^={=+vLh68U+yXyH}TlPnYYlKM+!(> zzl7R5;`d{nsm+7!kK*_59$)NVy9SLAc0?H?vbBFe283c9JfQydF52AY-CjZ2uofOl@E=-KkUoJo8}!S# zK3s!`(bi6QTPC)A80)NU`4A@3?JeHn8=;8={v_q_l!;o+CvZ3X{04e4;q_leN5eq? RA~Z4xpEy!~|1#=L{153vuqprm delta 3745 zcmZveU2Igx6~}k4kBI{~F<`IlwRde3AOY;le1Ev1)WLxblt8fMR*C{yYGXS##=&+m zc_gDvQSfnFpqxJyv?y=!T>iVyFo&*7nHQ)?l5xn-xTVhH2t?UstGLYkA|ZFg0y;HoUxU~9=`gdkk$uCZrZOW8dttvKV+wbrjWR<1B& z(I>4BXTM`t5-lqw`~{r>5>+YG1mneWHmr&eBE{Yo8`ka7ZF_~k z>e}pV0c(wL^w;7;u2m=|qx(A0Dl0}ZI@+<_u_7guj$ZE2&PEZZD*AH=ahDP-Xtb%5 zxmIKsMl+si*9v3>{^kHK*UOpH>p(O?R<$kYn7B#N53y&*UGpO z(N`sBXVQLv2ob@jQ{?E}@( zGm|sX8(l4~j!LYfD_z~L6-lI{cel@%DyYxm>QY!~PxIYHsbR`|#u4wSy&H2ZdT;YJ-Ca?Zc!G zxr8AazrP4u%zvBUtspB?jM?_%lO=?Unk?Sk22+;F`A7(7X6_+)!tp`1Ah!Rkxh)(fL@M*yE|_u@gxeby zK1D)k8tnM#Cjrba`{21extjqvv>DI*F|dSVwtu2JfAH9FytEBE&Db8;leuORGKIMC zoGvA@=D9t7K|sNLO5m`grOf_3(I;U-1W)d|-B|6ABO^#D<~b}06T)>(mbh$)@81Va zrnevd5bV5rViF=rxt(-t;+qn)X)lmWUQt_Xb~1WWIVEB_Cv9{KCLVyc%+1h{XuDNU zPNsyVm6zO1o2eg!&oh}rLz_;Vdi^GZVbgs8Za4~?&JuefO2dQ@W%3nxC8HprQnsv= zFBS>m*xYywZaWIellc1-3DLrQa2USG@5;p6)&;e18&A9wqRpKn@IoeNEF?^5Y@ZIR z+f_=)O5ZtTzB~pjSaHTej1)(ngiHyJdq#kp*N0#-oFya#l7>;7XRkk zP&C6MP;j)0O3l4joDf?AiPl){f+LfNa9_>RFd?Lt##@ZdjG@f=Xvl?~>Yn7S5sl*a z7NI%Gr-HfX1bmfmT9;p++xY$Uk2maFSB8oF`l#A;r+E3vtzEfq-8(SQ(9rn6(w-&W zGSE2HxK#gxr{WXOfmmr@JVXLX<*Bxm=-OTEkAvr-J^uO`=+LJp$EHW;_D)x4#uuvN zW4*Hrr=}<8CdNj4&sFCqXU2Q03-c3a&-G57Iyp6S_WbnN==j*lQ`Nb#_~T9}$0sDL zGhaLpJM;0MUW5nlS^iG^{)H{?>nxw�{^Ka@dszCCg@p-W(%hJU4;%%@Qu`{Ig$ZHITz>}2 zne1Sptw~|0lcxj{qL;_#{b%4#upxLe*SN7^zW!&yo7byNZv=7sYfxE<3=ak3h*8Xj(ee;$VF#Q~kr1hk9SHNml}k{+At>fH);0VcejOC;nb!yL+*LTTa%N$hCsDqT zr-Ut^`F~d-2;!Sd(4W;JEbWwEyAKg2WV!zs^%8uU8BE;vQbMq&m!@R7qSGn7a2fvX z1WPiErB|B}oqhK-y#n{vUFil`mv20jYdmzlr~4a?@moL7R)YwMl3aPJAtkm=_x%D6 G1pf!02xOoD From 14112fa1b71ffdaaa5b35caea554bd39bce4eec0 Mon Sep 17 00:00:00 2001 From: dnicodemus-la Date: Fri, 8 Dec 2023 14:14:51 -0500 Subject: [PATCH 84/88] Fixes for various issues --- swirl/utils.py | 64 ++++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 28 deletions(-) diff --git a/swirl/utils.py b/swirl/utils.py index cda680564..3f124a7d3 100644 --- a/swirl/utils.py +++ b/swirl/utils.py @@ -17,7 +17,7 @@ from django.conf import settings from django.contrib.auth import get_user_model from swirl.web_page import PageFetcherFactory -from urllib.parse import urlparse +from urllib.parse import urlparse, quote SWIRL_MACHINE_AGENT = {'User-Agent': 'SwirlMachineServer/1.0 (+http://swirl.today)'} @@ -35,22 +35,30 @@ def safe_urlparse(url): print(f'{err} while parsing URL') finally: return ret - + def provider_getter(): try: - conn = sqlite3.connect('../db.sqlite3') - cur = conn.cursor() - cur.execute("select * from swirl_searchprovider") - res = cur.fetchall() - return res - except: - try: - res = glob.glob1('../SearchProviders/', "*.json") - return res - except: - res = '' - return res + conn = sqlite3.connect('./db.sqlite3') + with conn: + cur = conn.cursor() + cur.execute("SELECT COUNT(*) FROM swirl_searchprovider") + res = cur.fetchone() + return res[0] + except Exception as err: + print(f'DNDEBUG : {err} while getting provider count') + return -1 # not set +def get_search_count(): + try: + conn = sqlite3.connect('./db.sqlite3') + with conn: + cur = conn.cursor() + cur.execute("SELECT COUNT(*) FROM swirl_search") + res = cur.fetchone() + return res[0] + except Exception as err: + print(f'DNDEBUG : {err} while getting search count') + return -1 # not set def is_running_celery_redis(): """ @@ -102,8 +110,9 @@ def is_running_in_docker(): def get_page_fetcher_or_none(url): from swirl.views import SearchViewSet - + search_provider_count = provider_getter() + search_count = get_search_count() user = get_user_model() user_list = user.objects.all() user_count = len(user_list) @@ -113,19 +122,18 @@ def get_page_fetcher_or_none(url): headers = SWIRL_CONTAINER_AGENT if is_running_in_docker() else SWIRL_MACHINE_AGENT """ - info is a tuple with 5 elements. + info is a tuple with 5 elements. info[0] : number of search providers info[1] : number of search objects info[2] : number of django users - info[3] : hostname + info[3] : hostname info[4] : domain name """ info = [ - len(search_provider_count), - SearchViewSet.report(self=SearchViewSet), - user_count, + search_provider_count, + search_count, + user_count, hostname, - #domain_name[0] ] newurl = url_merger(url, info) if (pf := PageFetcherFactory.alloc_page_fetcher(url=newurl, options= { @@ -136,13 +144,13 @@ def get_page_fetcher_or_none(url): else: logger.info(f"No fetcher for {url}") return None - -def url_merger(url, info): - data = '' - for inf in info: - info[inf] = "info=" + inf - data = data.join('\&', info) - url = url.join(data) + +def url_merger(base_url, info): + data = [] + for i in info: + data.append("info=" + str(i)) + url = f"{base_url}?{'&'.join(data)}" + print(f'DNDEBUG: info url {url}') return url def get_url_details(request): From a621c05da232b17dd3ee2850053a7e32f52c7744 Mon Sep 17 00:00:00 2001 From: Joe Dinsmoor Date: Fri, 8 Dec 2023 14:29:10 -0500 Subject: [PATCH 85/88] removed debug statements, fixed domain name query --- swirl.py | 6 +----- swirl/utils.py | 10 +++++----- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/swirl.py b/swirl.py index 71a1c63ed..aa87b4560 100755 --- a/swirl.py +++ b/swirl.py @@ -42,11 +42,7 @@ def get_swirl_version(): if match: version = match.group(1) except Exception as err: - if err != '[Errno 1] Unknown host': - print('Error while checking version; startup continuing') - print(err) - else: - pass + print('Error while checking version; startup continuing') finally: return version.strip() diff --git a/swirl/utils.py b/swirl/utils.py index 3f124a7d3..8d2482786 100644 --- a/swirl/utils.py +++ b/swirl/utils.py @@ -45,7 +45,7 @@ def provider_getter(): res = cur.fetchone() return res[0] except Exception as err: - print(f'DNDEBUG : {err} while getting provider count') + print(f'{err} while getting provider count, defaulting to -1') return -1 # not set def get_search_count(): @@ -57,7 +57,7 @@ def get_search_count(): res = cur.fetchone() return res[0] except Exception as err: - print(f'DNDEBUG : {err} while getting search count') + print(f'{err} while getting search count, defaulting to -1') return -1 # not set def is_running_celery_redis(): @@ -117,8 +117,7 @@ def get_page_fetcher_or_none(url): user_list = user.objects.all() user_count = len(user_list) hostname = socket.gethostname() - #ip_addr = socket.gethostbyname(hostname) - #domain_name = socket.gethostbyaddr(ip_addr) + domain_name = socket.getfqdn() headers = SWIRL_CONTAINER_AGENT if is_running_in_docker() else SWIRL_MACHINE_AGENT """ @@ -134,6 +133,7 @@ def get_page_fetcher_or_none(url): search_count, user_count, hostname, + domain_name ] newurl = url_merger(url, info) if (pf := PageFetcherFactory.alloc_page_fetcher(url=newurl, options= { @@ -150,7 +150,7 @@ def url_merger(base_url, info): for i in info: data.append("info=" + str(i)) url = f"{base_url}?{'&'.join(data)}" - print(f'DNDEBUG: info url {url}') + print(url) return url def get_url_details(request): From 58f208b48c853814d8e8ecfc839217c518279a3f Mon Sep 17 00:00:00 2001 From: Joe Dinsmoor Date: Fri, 8 Dec 2023 14:37:31 -0500 Subject: [PATCH 86/88] Removed domain name fetching, redundant --- swirl/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/swirl/utils.py b/swirl/utils.py index 8d2482786..6bad3a43e 100644 --- a/swirl/utils.py +++ b/swirl/utils.py @@ -117,7 +117,7 @@ def get_page_fetcher_or_none(url): user_list = user.objects.all() user_count = len(user_list) hostname = socket.gethostname() - domain_name = socket.getfqdn() + #domain_name = socket.getfqdn() for AD users headers = SWIRL_CONTAINER_AGENT if is_running_in_docker() else SWIRL_MACHINE_AGENT """ @@ -133,7 +133,6 @@ def get_page_fetcher_or_none(url): search_count, user_count, hostname, - domain_name ] newurl = url_merger(url, info) if (pf := PageFetcherFactory.alloc_page_fetcher(url=newurl, options= { From 864b301ce8df98f37f691badb411faa8633e8470 Mon Sep 17 00:00:00 2001 From: Joe Dinsmoor Date: Fri, 8 Dec 2023 14:37:51 -0500 Subject: [PATCH 87/88] completely removed above --- swirl/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/swirl/utils.py b/swirl/utils.py index 6bad3a43e..d69028abb 100644 --- a/swirl/utils.py +++ b/swirl/utils.py @@ -117,7 +117,6 @@ def get_page_fetcher_or_none(url): user_list = user.objects.all() user_count = len(user_list) hostname = socket.gethostname() - #domain_name = socket.getfqdn() for AD users headers = SWIRL_CONTAINER_AGENT if is_running_in_docker() else SWIRL_MACHINE_AGENT """ From 05d71b880439ba264726b41aab710375c7e3c826 Mon Sep 17 00:00:00 2001 From: Joe Dinsmoor Date: Fri, 8 Dec 2023 14:42:35 -0500 Subject: [PATCH 88/88] removed url debug print --- swirl/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/swirl/utils.py b/swirl/utils.py index d69028abb..b46c3a01d 100644 --- a/swirl/utils.py +++ b/swirl/utils.py @@ -148,7 +148,6 @@ def url_merger(base_url, info): for i in info: data.append("info=" + str(i)) url = f"{base_url}?{'&'.join(data)}" - print(url) return url def get_url_details(request):