diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml index 4592f4f08..dcf2bf721 100644 --- a/.github/workflows/docker-image.yml +++ b/.github/workflows/docker-image.yml @@ -14,8 +14,8 @@ jobs: - name: Login to Docker Hub uses: docker/login-action@v3 with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.SBS_DOCKER_USER }} + password: ${{ secrets.SBS_DOCKER_PAT }} - name: Build and Push Swirl Docker Image run: | BRANCH_NAME=${GITHUB_REF#refs/heads/} @@ -25,8 +25,8 @@ jobs: - name: Update the Docker Repo Description uses: peter-evans/dockerhub-description@v4 with: - username: ${{ secrets.DOCKER_USERNAME_X }} - password: ${{ secrets.DOCKER_PASSWORD_X }} + username: ${{ secrets.SBS_DOCKER_USER }} + password: ${{ secrets.SBS_DOCKER_PAT }} repository: swirlai/swirl-search - name: Upload Log Files if: always() diff --git a/.github/workflows/qa-suite.yml b/.github/workflows/qa-suite.yml index 657815720..2953cefd9 100644 --- a/.github/workflows/qa-suite.yml +++ b/.github/workflows/qa-suite.yml @@ -23,19 +23,19 @@ jobs: - name: Login to Docker Hub uses: docker/login-action@v3 with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.SBS_DOCKER_USER }} + password: ${{ secrets.SBS_DOCKER_PAT }} - name: Set Up Python uses: actions/setup-python@v5 with: - python-version: '3.12.5' + python-version: '3.12.6' cache: 'pip' - name: Install Swirl run: ./install.sh - name: Setup Swirl run: python swirl.py setup - name: Install the Latest Galaxy UI - run: ./install-ui.sh + run: ./install-ui.sh -p env: MSAL_CB_PORT: 8000 MSAL_HOST: localhost @@ -46,6 +46,11 @@ jobs: python swirl.py start env: ALLOWED_HOSTS: localhost,host.docker.internal + - name: Decode BigQuery token and create JSON file + run: | + echo "$BIGQUERY_TOKEN_BASE64" | base64 --decode > "${{ github.workspace }}/token.json" + env: + BIGQUERY_TOKEN_BASE64: ${{ secrets.QA_BIGQUERY_TOKEN_BASE64 }} - name: Run the QA Suite run: | echo "SWIRL_TEST_HOST=localhost" > .env.qa @@ -53,10 +58,18 @@ jobs: echo "MSAL_CB_PORT=8000" >> .env.qa echo "QA_ADMIN_PW=${{ secrets.QA_ADMIN_PW }}" >> .env.qa echo "QA_OPENAI_KEY=${{ secrets.QA_OPENAI_KEY }}" >> .env.qa + echo "QA_NLR_USERNAME=${{ secrets.QA_NLR_USERNAME }}" >> .env.qa + echo "QA_NLR_PASSWORD=${{ secrets.QA_NLR_PASSWORD }}" >> .env.qa + echo "QA_CRUNCHBASE_KEY=${{ secrets.QA_CRUNCHBASE_KEY }}" >> .env.qa + echo "QA_BLOCKCHAIN_KEY=${{ secrets.QA_BLOCKCHAIN_KEY }}" >> .env.qa + echo "QA_YOUTRACK_TOKEN=${{ secrets.QA_YOUTRACK_TOKEN }}" >> .env.qa + echo "QA_GITHUB_TOKEN=${{ secrets.QA_GITHUB_TOKEN }}" >> .env.qa + echo "BIGQUERY_TOKEN_PATH=${{ github.workspace }}/token.json" >> .env.qa + echo "QA_TRELLO_KEYS=${{ secrets.QA_TRELLO_KEYS }}" >> .env.qa echo "========" cat .env.qa echo "========" - docker run --net=host --env-file .env.qa -t swirlai/swirl-search-qa:automated-tests-master sh -c "behave --tags=qa_suite,community" + docker run --net=host --env-file .env.qa -t swirlai/swirl-search-qa:automated-tests-develop sh -c "behave --tags=qa_suite,community" - name: Upload Log Files if: always() uses: actions/upload-artifact@v4 diff --git a/.github/workflows/spell-checker.yml b/.github/workflows/spell-checker.yml index 3c61204c9..3a5786852 100644 --- a/.github/workflows/spell-checker.yml +++ b/.github/workflows/spell-checker.yml @@ -2,14 +2,8 @@ name: Check Spelling # Trigger to only run this workflow automatically on docs/ directory changes on: - push: - branches: - - "main" - paths: - - "docs/**" - pull_request: - # Run for all PRs to develop - means PR cannot merge until unit tests pass + # Run for all PRs to develop or main - means PR cannot merge until unit tests pass branches: - develop - main diff --git a/.github/workflows/test-build-pipeline.yml b/.github/workflows/test-build-pipeline.yml index a655acda7..c40154fa1 100644 --- a/.github/workflows/test-build-pipeline.yml +++ b/.github/workflows/test-build-pipeline.yml @@ -20,7 +20,7 @@ jobs: - name: Set Up Python uses: actions/setup-python@v5 with: - python-version: '3.12.5' + python-version: '3.12.6' cache: 'pip' - name: Install Swirl run: ./install.sh @@ -48,12 +48,12 @@ jobs: - name: Login to Docker Hub uses: docker/login-action@v3 with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.SBS_DOCKER_USER }} + password: ${{ secrets.SBS_DOCKER_PAT }} - name: Set Up Python uses: actions/setup-python@v5 with: - python-version: '3.12.5' + python-version: '3.12.6' cache: 'pip' - name: Install Swirl run: ./install.sh @@ -71,6 +71,11 @@ jobs: python swirl.py start env: ALLOWED_HOSTS: localhost,host.docker.internal + - name: Decode BigQuery token and create JSON file + run: | + echo "$BIGQUERY_TOKEN_BASE64" | base64 --decode > "${{ github.workspace }}/token.json" + env: + BIGQUERY_TOKEN_BASE64: ${{ secrets.QA_BIGQUERY_TOKEN_BASE64 }} - name: Run the QA Suite run: | echo "SWIRL_TEST_HOST=localhost" > .env.qa @@ -78,6 +83,14 @@ jobs: echo "MSAL_CB_PORT=8000" >> .env.qa echo "QA_ADMIN_PW=${{ secrets.QA_ADMIN_PW }}" >> .env.qa echo "QA_OPENAI_KEY=${{ secrets.QA_OPENAI_KEY }}" >> .env.qa + echo "QA_NLR_USERNAME=${{ secrets.QA_NLR_USERNAME }}" >> .env.qa + echo "QA_NLR_PASSWORD=${{ secrets.QA_NLR_PASSWORD }}" >> .env.qa + echo "QA_CRUNCHBASE_KEY=${{ secrets.QA_CRUNCHBASE_KEY }}" >> .env.qa + echo "QA_BLOCKCHAIN_KEY=${{ secrets.QA_BLOCKCHAIN_KEY }}" >> .env.qa + echo "QA_YOUTRACK_TOKEN=${{ secrets.QA_YOUTRACK_TOKEN }}" >> .env.qa + echo "QA_GITHUB_TOKEN=${{ secrets.QA_GITHUB_TOKEN }}" >> .env.qa + echo "BIGQUERY_TOKEN_PATH=${{ github.workspace }}/token.json" >> .env.qa + echo "QA_TRELLO_KEYS=${{ secrets.QA_TRELLO_KEYS }}" >> .env.qa echo "========" cat .env.qa echo "========" @@ -92,8 +105,8 @@ jobs: - name: Login to Docker Hub uses: docker/login-action@v3 with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.SBS_DOCKER_USER }} + password: ${{ secrets.SBS_DOCKER_PAT }} - name: Build and Push Swirl Docker Image run: | BRANCH_NAME=${GITHUB_REF#refs/heads/} @@ -103,8 +116,8 @@ jobs: - name: Update the Docker Repo Description uses: peter-evans/dockerhub-description@v4 with: - username: ${{ secrets.DOCKER_USERNAME_X }} - password: ${{ secrets.DOCKER_PASSWORD_X }} + username: ${{ secrets.SBS_DOCKER_USER }} + password: ${{ secrets.SBS_DOCKER_PAT }} repository: swirlai/swirl-search - name: Upload Log Files if: always() diff --git a/.github/workflows/testing-wip.yml b/.github/workflows/testing-wip.yml index 7ea047f2e..d7edfe608 100644 --- a/.github/workflows/testing-wip.yml +++ b/.github/workflows/testing-wip.yml @@ -36,12 +36,12 @@ jobs: - name: Login to Docker Hub uses: docker/login-action@v3 with: - username: ${{ secrets.DOCKER_USERNAME }} - password: ${{ secrets.DOCKER_PASSWORD }} + username: ${{ secrets.SBS_DOCKER_USER }} + password: ${{ secrets.SBS_DOCKER_PAT }} - name: Set Up Python uses: actions/setup-python@v5 with: - python-version: '3.12.5' + python-version: '3.12.6' cache: 'pip' - name: Install Swirl run: ./install.sh @@ -59,6 +59,11 @@ jobs: python swirl.py start env: ALLOWED_HOSTS: localhost,host.docker.internal + - name: Decode BigQuery token and create JSON file + run: | + echo "$BIGQUERY_TOKEN_BASE64" | base64 --decode > "${{ github.workspace }}/token.json" + env: + BIGQUERY_TOKEN_BASE64: ${{ secrets.QA_BIGQUERY_TOKEN_BASE64 }} - name: Run the QA Suite run: | echo "SWIRL_TEST_HOST=localhost" > .env.qa @@ -66,6 +71,14 @@ jobs: echo "MSAL_CB_PORT=8000" >> .env.qa echo "QA_ADMIN_PW=${{ secrets.QA_ADMIN_PW }}" >> .env.qa echo "QA_OPENAI_KEY=${{ secrets.QA_OPENAI_KEY }}" >> .env.qa + echo "QA_NLR_USERNAME=${{ secrets.QA_NLR_USERNAME }}" >> .env.qa + echo "QA_NLR_PASSWORD=${{ secrets.QA_NLR_PASSWORD }}" >> .env.qa + echo "QA_CRUNCHBASE_KEY=${{ secrets.QA_CRUNCHBASE_KEY }}" >> .env.qa + echo "QA_BLOCKCHAIN_KEY=${{ secrets.QA_BLOCKCHAIN_KEY }}" >> .env.qa + echo "QA_YOUTRACK_TOKEN=${{ secrets.QA_YOUTRACK_TOKEN }}" >> .env.qa + echo "QA_GITHUB_TOKEN=${{ secrets.QA_GITHUB_TOKEN }}" >> .env.qa + echo "BIGQUERY_TOKEN_PATH=${{ github.workspace }}/token.json" >> .env.qa + echo "QA_TRELLO_KEYS=${{ secrets.QA_TRELLO_KEYS }}" >> .env.qa echo "========" cat .env.qa echo "========" diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 5dd320254..a4176c1e5 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -31,7 +31,7 @@ jobs: - name: Set Up Python uses: actions/setup-python@v5 with: - python-version: '3.12.5' + python-version: '3.12.6' cache: 'pip' - name: Install Swirl run: ./install.sh diff --git a/.gitignore b/.gitignore index 12c60a4c6..677d4fe3d 100644 --- a/.gitignore +++ b/.gitignore @@ -130,7 +130,6 @@ dmypy.json /static -swirl/migrations/ # emacs *~ diff --git a/Dockerfile b/Dockerfile index 621ffe1ad..bc2d6bd33 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.12.5-slim-bookworm +FROM python:3.12.6-slim-bookworm # Update, upgrade and install packages in a single RUN to reduce layers RUN apt-get update && apt-get install -y \ diff --git a/SearchProviders/atlassian.json b/SearchProviders/atlassian.json index 736848249..73e050542 100644 --- a/SearchProviders/atlassian.json +++ b/SearchProviders/atlassian.json @@ -22,7 +22,7 @@ "tags": [ "Jira", "Atlassian", - "Internal" + "Dev" ] }, { @@ -48,7 +48,7 @@ "tags": [ "Confluence", "Atlassian", - "Internal" + "Dev" ] }, { diff --git a/SearchProviders/company_data_bigquery.json b/SearchProviders/company_data_bigquery.json new file mode 100644 index 000000000..9a8f5a96d --- /dev/null +++ b/SearchProviders/company_data_bigquery.json @@ -0,0 +1,31 @@ +{ + "name": "Company Data - BigQuery", + "active": false, + "default": false, + "connector": "BigQuery", + "url": "", + "query_template": "select {fields} from `{table}` where search({field1}, '{query_string}') or search({field2}, '{query_string}') or search({field3}, '{query_string}');", + "query_template_json": {}, + "post_query_template": {}, + "http_request_headers": {}, + "page_fetch_config_json": {}, + "query_processors": [ + "AdaptiveQueryProcessor" + ], + "query_mappings": "fields=*,sort_by_date=year_founded,table=company_dataset.company,field1=name,field2=domain,field3=locality", + "result_grouping_field": "", + "result_processors": [ + "MappingResultProcessor", + "CosineRelevancyResultProcessor" + ], + "response_mappings": "", + "result_mappings": "title=name,body='{name} was founded in {year_founded} and serves the {industry} industry. The company is located in {locality} and has approximately {current_employee_estimate} employees. The registered domain for this organization is: {domain}',url='https://www.{linkedin_url}',NO_PAYLOAD", + "results_per_query": 10, + "credentials": "/path/to/bigquery/token.json", + "eval_credentials": "", + "tags": [ + "Company", + "BigQuery", + "Internal" + ] +} diff --git a/SearchProviders/elasticsearch.json b/SearchProviders/elasticsearch.json index 00e4a5a49..57075b9bd 100644 --- a/SearchProviders/elasticsearch.json +++ b/SearchProviders/elasticsearch.json @@ -13,7 +13,7 @@ "MappingResultProcessor", "CosineRelevancyResultProcessor" ], - "result_mappings": "url=_source.url,date_published=_source.date_published,author=_source.author,title=_source.subject,body=_source.content,_source.to,NO_PAYLOAD", + "result_mappings": "url='https:///email/_doc/{_id}',date_published=_source.date_published,author=_source.author,title=_source.subject,body=_source.content,_source.to,NO_PAYLOAD", "credentials": "verify_certs=[True|False],ca_certs=/path/to/cert/file.crt,username:password", "tags": [ "Enron", diff --git a/SearchProviders/funding_db_bigquery.json b/SearchProviders/funding_db_bigquery.json deleted file mode 100644 index 769107056..000000000 --- a/SearchProviders/funding_db_bigquery.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "name": "Company Funding Records - BigQuery", - "active": false, - "default": false, - "connector": "BigQuery", - "query_template": "select {fields} from `{table}` where search({field1}, '{query_string}') or search({field2}, '{query_string}');", - "query_processors": [ - "AdaptiveQueryProcessor" - ], - "query_mappings": "fields=*,sort_by_date=fundedDate,table=funding.funding,field1=company,field2=city", - "result_processors": [ - "MappingResultProcessor", - "CosineRelevancyResultProcessor" - ], - "result_mappings": "title='{company}',body='{company} raised ${raisedamt} series {round} on {fundeddate}. The company is located in {city} {state} and has {numemps} employees.',url=id,date_published=fundeddate,NO_PAYLOAD", - "credentials": "/path/to/bigquery/token.json", - "tags": [ - "Company", - "BigQuery", - "Internal" - ] -} diff --git a/SearchProviders/github.json b/SearchProviders/github.json index 193179278..bf5f298bf 100644 --- a/SearchProviders/github.json +++ b/SearchProviders/github.json @@ -28,7 +28,7 @@ "GitHub", "Code", "Internal", - "Development" + "Dev" ] }, { @@ -60,7 +60,7 @@ "GitHub", "Issues", "Internal", - "Development" + "Dev" ] }, { @@ -92,7 +92,7 @@ "GitHub", "PullRequests", "Internal", - "Development" + "Dev" ] }, { @@ -124,7 +124,7 @@ "GitHub", "Commits", "Internal", - "Development" + "Dev" ] } ] diff --git a/SearchProviders/google.json b/SearchProviders/google.json index fe768419e..09972251e 100644 --- a/SearchProviders/google.json +++ b/SearchProviders/google.json @@ -152,7 +152,8 @@ "credentials": "key=AIzaSyDvVeE-L6nCC9u-TTGuhggvSmzhtiTHJsA", "eval_credentials": "", "tags": [ - "Swirl" + "Swirl", + "Dev" ] } ] diff --git a/SearchProviders/hacker_news.json b/SearchProviders/hacker_news.json index 880ebefce..fb9c25647 100644 --- a/SearchProviders/hacker_news.json +++ b/SearchProviders/hacker_news.json @@ -26,7 +26,7 @@ "tags": [ "HackerNews", "Stories", - "Development" + "Dev" ] }, { @@ -56,7 +56,7 @@ "tags": [ "HackerNews", "Comments", - "Development" + "Dev" ] } diff --git a/SearchProviders/microsoft.json b/SearchProviders/microsoft.json index 79dfe5138..a9f5918f4 100644 --- a/SearchProviders/microsoft.json +++ b/SearchProviders/microsoft.json @@ -13,8 +13,8 @@ "query_mappings": "NOT=true,NOT_CHAR=-", "result_grouping_field": "conversationId", "result_processors": [ - "MappingResultProcessor", "DedupeByFieldResultProcessor", + "MappingResultProcessor", "CosineRelevancyResultProcessor" ], "response_mappings": "", @@ -75,7 +75,7 @@ "CosineRelevancyResultProcessor" ], "response_mappings": "", - "result_mappings": "title=resource.name,body='{resource.name} - {summary}',date_published=resource.createdDateTime,url=resource.webUrl,author=resource.createdBy.user.displayName,resource.lastModifiedBy.user.displayName,resource.lastModifiedDateTime,FILE_SYSTEM,NO_PAYLOAD", + "result_mappings": "title=resource.name,body='{resource.name} - {summary}',date_published=resource.createdDateTime,url=resource.webUrl,author=resource.createdBy.user.displayName,resource.lastModifiedBy.user.displayName,resource.lastModifiedDateTime,NO_PAYLOAD", "results_per_query": 10, "credentials": "", "eval_credentials": "", diff --git a/SearchProviders/opensearch.json b/SearchProviders/opensearch.json index b20b68484..916397c4f 100644 --- a/SearchProviders/opensearch.json +++ b/SearchProviders/opensearch.json @@ -13,7 +13,7 @@ "MappingResultProcessor", "CosineRelevancyResultProcessor" ], - "result_mappings": "url=_source.url,date_published=_source.date_published,author=_source.author,title=_source.subject,body=_source.content,_source.to,NO_PAYLOAD", + "result_mappings": "url='https:///email/_doc/{_id}',date_published=_source.date_published,author=_source.author,title=_source.subject,body=_source.content,_source.to,NO_PAYLOAD", "credentials": "verify_certs=[True|False],ca_certs=/path/to/cert/file.crt,username:password", "tags": [ "Enron", diff --git a/SearchProviders/preloaded.json b/SearchProviders/preloaded.json index 694d9ef8e..07dc77cfd 100644 --- a/SearchProviders/preloaded.json +++ b/SearchProviders/preloaded.json @@ -155,7 +155,8 @@ "credentials": "key=AIzaSyDvVeE-L6nCC9u-TTGuhggvSmzhtiTHJsA", "eval_credentials": "", "tags": [ - "Swirl" + "Swirl", + "Dev" ] }, { @@ -326,28 +327,34 @@ ] }, { - "name": "Company Funding Records - BigQuery", + "name": "Company Data - BigQuery", + "description": "Searches info on 7 million companies worldwide ncluding Linkedin URL, company size, location, and number of employees. Search only with company name, domain or location. Supports many languages. Does not support NOT operator.", "active": false, "default": false, "connector": "BigQuery", "url": "", - "query_template": "select {fields} from `{table}` where search({field1}, '{query_string}') or search({field2}, '{query_string}');", + "query_template": "select {fields} from `{table}` where search({field1}, '{query_string}') or search({field2}, '{query_string}') or search({field3}, '{query_string}');", + "query_template_json": {}, + "post_query_template": {}, + "http_request_headers": {}, + "page_fetch_config_json": {}, "query_processors": [ "AdaptiveQueryProcessor" ], - "query_mappings": "fields=*,sort_by_date=fundedDate,table=funding.funding,field1=company,field2=city", + "query_mappings": "fields=*,sort_by_date=year_founded,table=company_dataset.company,field1=name,field2=domain,field3=locality", + "result_grouping_field": "", "result_processors": [ "MappingResultProcessor", "CosineRelevancyResultProcessor" ], "response_mappings": "", - "result_mappings": "title='{company}',body='{company} raised ${raisedamt} series {round} on {fundeddate}. The company is located in {city} {state} and has {numemps} employees.',url=id,date_published=fundeddate,NO_PAYLOAD", + "result_mappings": "title=name,body='{name} was founded in {year_founded} and serves the {industry} industry. The company is located in {locality} and has approximately {current_employee_estimate} employees. The registered domain for this organization is: {domain}',url='https://www.{linkedin_url}',NO_PAYLOAD", "results_per_query": 10, "credentials": "/path/to/bigquery/token.json", + "eval_credentials": "", "tags": [ "Company", - "BigQuery", - "Internal" + "BigQuery" ] }, { @@ -416,7 +423,7 @@ "CosineRelevancyResultProcessor" ], "response_mappings": "", - "result_mappings": "url=_source.url,date_published=_source.date_published,author=_source.author,title=_source.subject,body=_source.content,_source.to,NO_PAYLOAD", + "result_mappings": "url='https:///email/_doc/{_id}',date_published=_source.date_published,author=_source.author,title=_source.subject,body=_source.content,_source.to,NO_PAYLOAD", "results_per_query": 10, "credentials": "verify_certs=[True|False],ca_certs=/path/to/cert/file.crt,admin:admin", "tags": [ @@ -441,7 +448,7 @@ "CosineRelevancyResultProcessor" ], "response_mappings": "", - "result_mappings": "url=_source.url,date_published=_source.date_published,author=_source.author,title=_source.subject,body=_source.content,_source.to,NO_PAYLOAD", + "result_mappings": "url='https:///email/_doc/{_id}',date_published=_source.date_published,author=_source.author,title=_source.subject,body=_source.content,_source.to,NO_PAYLOAD", "results_per_query": 10, "credentials": "verify_certs=[True|False],ca_certs=/path/to/cert/file.crt,username:password", "tags": [ @@ -474,7 +481,7 @@ "tags": [ "Articles", "YouTrack", - "Internal" + "Dev" ] }, { @@ -501,7 +508,7 @@ "tags": [ "Issues", "YouTrack", - "Internal" + "Dev" ] }, { @@ -527,7 +534,7 @@ "tags": [ "Confluence", "Atlassian", - "Internal" + "Dev" ] }, { @@ -553,7 +560,7 @@ "tags": [ "Jira", "Atlassian", - "Internal" + "Dev" ] }, { @@ -572,8 +579,8 @@ "query_mappings": "NOT=true,NOT_CHAR=-", "result_grouping_field": "conversationId", "result_processors": [ - "MappingResultProcessor", "DedupeByFieldResultProcessor", + "MappingResultProcessor", "CosineRelevancyResultProcessor" ], "response_mappings": "", @@ -637,7 +644,7 @@ "CosineRelevancyResultProcessor" ], "response_mappings": "", - "result_mappings": "title=resource.name,body='{resource.name} - {summary}',date_published=resource.createdDateTime,url=resource.webUrl,author=resource.createdBy.user.displayName,resource.lastModifiedBy.user.displayName,resource.lastModifiedDateTime,FILE_SYSTEM,NO_PAYLOAD", + "result_mappings": "title=resource.name,body='{resource.name} - {summary}',date_published=resource.createdDateTime,url=resource.webUrl,author=resource.createdBy.user.displayName,resource.lastModifiedBy.user.displayName,resource.lastModifiedDateTime,NO_PAYLOAD", "results_per_query": 10, "credentials": "", "eval_credentials": "", @@ -734,7 +741,7 @@ "GitHub", "Code", "Internal", - "Development" + "Dev" ] }, { @@ -766,7 +773,7 @@ "GitHub", "Issues", "Internal", - "Development" + "Dev" ] }, { @@ -798,7 +805,7 @@ "GitHub", "PullRequests", "Internal", - "Development" + "Dev" ] }, { @@ -830,7 +837,7 @@ "GitHub", "Commits", "Internal", - "Development" + "Dev" ] }, { @@ -961,7 +968,7 @@ "tags": [ "HackerNews", "Stories", - "Development" + "Dev" ] }, { @@ -990,7 +997,7 @@ "tags": [ "HackerNews", "Comments", - "Development" + "Dev" ] }, { diff --git a/SearchProviders/youtrack.json b/SearchProviders/youtrack.json index 6d3f6a9cb..b7c81972b 100644 --- a/SearchProviders/youtrack.json +++ b/SearchProviders/youtrack.json @@ -23,7 +23,7 @@ "tags": [ "Issues", "YouTrack", - "Internal" + "Dev" ] }, { @@ -50,7 +50,7 @@ "tags": [ "Articles", "YouTrack", - "Internal" + "Dev" ] } ] diff --git a/db.sqlite3.dist b/db.sqlite3.dist index 73efcb3c7..7982bdf13 100644 Binary files a/db.sqlite3.dist and b/db.sqlite3.dist differ diff --git a/requirements.txt b/requirements.txt index e89f5c4a7..2066c1eca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,104 +1,103 @@ -## The following requirements were added by pip freeze: amqp==5.2.0 annotated-types==0.7.0 -anyio==4.5.0 +anyio==4.6.2.post1 asgiref==3.8.1 asn1crypto==1.5.1 attrs==24.2.0 autobahn==24.4.2 Automat==24.8.1 -azure-core==1.31.0 +azure-core==1.32.0 beautifulsoup4==4.12.3 -billiard==4.2.0 +billiard==4.2.1 blis==0.7.11 bs4==0.0.2 cachetools==5.5.0 catalogue==2.0.10 -celery==5.5.0b3 +celery==5.5.0rc1 certifi==2024.8.30 cffi==1.17.1 channels==4.1.0 channels-redis==4.2.0 chardet==5.2.0 -charset-normalizer==3.3.2 +charset-normalizer==3.4.0 click==8.1.7 click-didyoumean==0.3.1 click-plugins==1.1.1 click-repl==0.3.0 -cloudpathlib==0.19.0 +cloudpathlib==0.20.0 confection==0.1.5 constantly==23.10.4 cron-descriptor==1.4.5 -cryptography==43.0.1 +cryptography==43.0.3 cssselect==1.2.0 cymem==2.0.8 daphne==4.1.2 distro==1.9.0 -Django==5.1.1 +Django==5.1.3 django-celery-beat==2.7.0 django-environ==0.11.2 django-restframework==0.0.1 django-timezone-field==7.0 djangorestframework==3.15.2 -dnspython==2.6.1 +dnspython==2.7.0 docutils==0.21.2 drf-spectacular==0.27.2 -elastic-transport==8.15.0 +elastic-transport==8.15.1 elasticsearch==8.15.1 en-core-web-lg @ https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.7.1/en_core_web_lg-3.7.1-py3-none-any.whl#sha256=ab70aeb6172cde82508f7739f35ebc9918a3d07debeed637403c8f794ba3d3dc Events==0.5 filelock==3.16.1 -fsspec==2024.9.0 -google-api-core==2.20.0 -google-auth==2.35.0 -google-cloud-bigquery==3.25.0 +fsspec==2024.10.0 +google-api-core==2.22.0 +google-auth==2.36.0 +google-cloud-bigquery==3.26.0 google-cloud-core==2.4.1 google-crc32c==1.6.0 google-resumable-media==2.7.2 googleapis-common-protos==1.65.0 -grpcio==1.66.1 -grpcio-status==1.66.1 -grpcio-tools==1.66.1 +grpcio==1.67.1 +grpcio-status==1.67.1 +grpcio-tools==1.67.1 h11==0.14.0 h2==4.1.0 hpack==4.0.0 -httpcore==1.0.5 +httpcore==1.0.6 httpx==0.27.2 -huggingface-hub==0.25.0 +huggingface-hub==0.26.2 hyperframe==6.0.1 hyperlink==21.0.0 idna==3.10 incremental==24.7.2 inflection==0.5.1 Jinja2==3.1.4 -jiter==0.5.0 +jiter==0.7.0 joblib==1.4.2 -jsonpath-ng==1.6.1 +jsonpath-ng==1.7.0 jsonschema==4.23.0 -jsonschema-specifications==2023.12.1 +jsonschema-specifications==2024.10.1 kombu==5.4.2 -langcodes==3.4.0 +langcodes==3.4.1 language_data==1.2.0 lxml==5.3.0 -lxml_html_clean==0.2.2 -marisa-trie==1.2.0 +lxml_html_clean==0.3.1 +marisa-trie==1.2.1 markdown-it-py==3.0.0 -MarkupSafe==2.1.5 +MarkupSafe==3.0.2 mdurl==0.1.2 mpmath==1.3.0 msal==1.31.0 msgpack==1.1.0 murmurhash==1.0.10 natsort==8.4.0 -networkx==3.3 +networkx==3.4.2 nltk==3.9.1 numpy==1.26.4 -openai==1.46.1 +openai==1.54.3 opensearch-py==2.7.1 -oracledb==2.4.1 -packaging==24.1 +oracledb==2.5.0 +packaging==24.2 pandas==2.2.3 -phonenumbers==8.13.45 +phonenumbers==8.13.49 pika==1.3.2 pinecone-client==5.0.1 pinecone-plugin-inference==1.1.0 @@ -109,20 +108,20 @@ portalocker==2.10.1 preshed==3.0.9 presidio_analyzer==2.2.355 presidio_anonymizer==2.2.355 -prompt_toolkit==3.0.47 -proto-plus==1.24.0 -protobuf==5.28.2 -psycopg2-binary==2.9.9 +prompt_toolkit==3.0.48 +proto-plus==1.25.0 +protobuf==5.28.3 +psycopg2-binary==2.9.10 pyahocorasick==2.1.0 pyasn1==0.6.1 pyasn1_modules==0.4.1 pycparser==2.22 -pycryptodome==3.20.0 +pycryptodome==3.21.0 pydantic==2.9.2 pydantic_core==2.23.4 Pygments==2.18.0 PyJWT==2.9.0 -pymongo==4.9.1 +pymongo==4.10.1 pyOpenSSL==24.2.1 python-crontab==3.2.0 python-dateutil==2.9.0.post0 @@ -130,22 +129,22 @@ pytz==2024.2 PyYAML==6.0.2 qdrant-client==1.10.0 readability-lxml==0.8.1 -redis==5.0.8 +redis==5.2.0 referencing==0.35.1 -regex==2024.9.11 +regex==2024.11.6 requests==2.32.3 requests-file==2.1.0 -rich==13.8.1 -rpds-py==0.20.0 +rich==13.9.4 +rpds-py==0.21.0 rsa==4.9 safetensors==0.4.5 -service-identity==24.1.0 -setuptools==75.1.0 +service-identity==24.2.0 +setuptools==75.3.0 shellingham==1.5.4 six==1.16.0 -smart-open==7.0.4 +smart-open==7.0.5 sniffio==1.3.1 -snowflake-connector-python==3.12.2 +snowflake-connector-python==3.12.3 sortedcontainers==2.4.0 soupsieve==2.6 spacy==3.7.5 @@ -154,29 +153,29 @@ spacy-loggers==1.0.5 sqlparse==0.5.1 srsly==2.4.8 statistics==1.0.3.5 -sympy==1.13.3 +sympy==1.13.1 textblob==0.18.0.post0 thinc==8.2.5 tika==2.6.0 -tiktoken==0.7.0 -tldextract==5.1.2 -tokenizers==0.19.1 +tiktoken==0.8.0 +tldextract==5.1.3 +tokenizers==0.20.3 tomlkit==0.13.2 -torch==2.4.1 -tqdm==4.66.5 -transformers==4.44.2 -Twisted==24.7.0 +torch==2.5.1 +tqdm==4.67.0 +transformers==4.46.2 +Twisted==24.10.0 txaio==23.1.1 -typer==0.12.5 +typer==0.13.0 typing_extensions==4.12.2 -tzdata==2024.1 +tzdata==2024.2 uritemplate==4.1.1 urllib3==2.2.3 vine==5.1.0 wasabi==1.1.3 wcwidth==0.2.13 weasel==0.4.1 -whitenoise==6.7.0 +whitenoise==6.8.2 wrapt==1.16.0 -xmltodict==0.13.0 -zope.interface==7.0.3 +xmltodict==0.14.2 +zope.interface==7.1.1 diff --git a/swirl/banner.py b/swirl/banner.py index e75103274..2b54a7344 100644 --- a/swirl/banner.py +++ b/swirl/banner.py @@ -10,9 +10,9 @@ class bcolors: ENDC = '\033[0m' BOLD = '\033[1m' -SWIRL_VERSION = '3.8.0.0' +SWIRL_VERSION = '3.9.0.0' -SWIRL_BANNER_TEXT = "__S_W_I_R_L__3_._8_._0_._0__________________________________________________________" +SWIRL_BANNER_TEXT = "__S_W_I_R_L__3_._9_._0_._0__________________________________________________________" SWIRL_BANNER = f'{bcolors.BOLD}{SWIRL_BANNER_TEXT}{bcolors.ENDC}' ############################################# diff --git a/swirl/connectors/elastic.py b/swirl/connectors/elastic.py index 4cd3134ab..3f5612dcc 100644 --- a/swirl/connectors/elastic.py +++ b/swirl/connectors/elastic.py @@ -67,9 +67,10 @@ def construct_query(self): self.error(f"elastic_query unexpectedly blank") self.query_to_provider = elastic_query + logger.debug(f"Constructed query_to_provider: {self.query_to_provider}") return - def execute_search(self, session=None): + def execute_search(self, size, session=None): logger.debug(f"{self}: execute_search()") @@ -129,10 +130,18 @@ def execute_search(self, session=None): else: self.status = "ERR_NO_QUERY_SPECIFIED" return + + # Extract size (int) - Optional + size_pattern = r"size=(\d+)" + match = re.search(size_pattern, self.query_to_provider) + if match: + size = int(match.group(1)) + else: + size = 10 # Default size if not specified response = None try: - response = es.search(index=index, query=query) + response = es.search(index=index, query=query, size=size) except ConnectionError as err: self.error(f"es.search reports: {err}") except NotFoundError: diff --git a/swirl/migrations/0001_initial.py b/swirl/migrations/0001_initial.py new file mode 100644 index 000000000..7c2f88642 --- /dev/null +++ b/swirl/migrations/0001_initial.py @@ -0,0 +1,143 @@ +# Generated by Django 5.1.1 on 2024-09-27 13:26 + +import django.db.models.deletion +import swirl.models +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name='Authenticator', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('name', models.CharField(max_length=100)), + ], + ), + migrations.CreateModel( + name='Search', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('date_created', models.DateTimeField(auto_now_add=True)), + ('date_updated', models.DateTimeField(auto_now=True)), + ('query_string', models.CharField(default=str, max_length=256)), + ('query_string_processed', models.CharField(blank=True, default=str, max_length=256)), + ('sort', models.CharField(blank=True, default='relevancy', max_length=50)), + ('results_requested', models.IntegerField(default=10)), + ('searchprovider_list', models.JSONField(blank=True, default=list)), + ('subscribe', models.BooleanField(default=False)), + ('status', models.CharField(default='NEW_SEARCH', max_length=50)), + ('time', models.FloatField(default=0.0)), + ('pre_query_processors', models.JSONField(blank=True, default=swirl.models.getSearchPreQueryProcessorsDefault)), + ('post_result_processors', models.JSONField(blank=True, default=swirl.models.getSearchPostResultProcessorsDefault)), + ('result_url', models.CharField(blank=True, default='/swirl/results?search_id=%d&result_mixer=%s', max_length=2048)), + ('new_result_url', models.CharField(blank=True, default='/swirl/results?search_id=%d&result_mixer=RelevancyNewItemsMixer', max_length=2048)), + ('messages', models.JSONField(blank=True, default=list)), + ('result_mixer', models.CharField(choices=[('DateMixer', 'DateMixer'), ('DateNewItemsMixer', 'DateNewItemsMixer'), ('RelevancyMixer', 'RelevancyMixer'), ('RelevancyNewItemsMixer', 'RelevancyNewItemsMixer'), ('RoundRobinMixer', 'RoundRobinMixer'), ('Stack1Mixer', 'Stack1Mixer'), ('Stack2Mixer', 'Stack2Mixer'), ('Stack3Mixer', 'Stack3Mixer'), ('StackNMixer', 'StackNMixer')], default='RelevancyMixer', max_length=200)), + ('retention', models.IntegerField(choices=[(0, 'Never expire'), (1, 'Expire after 1 hour'), (2, 'Expire after 1 day'), (3, 'Expire after 1 month')], default=0)), + ('tags', models.JSONField(default=list)), + ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'ordering': ['-date_updated'], + }, + ), + migrations.CreateModel( + name='Result', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('date_created', models.DateTimeField(auto_now_add=True)), + ('date_updated', models.DateTimeField(auto_now=True)), + ('provider_id', models.IntegerField(default=0)), + ('searchprovider', models.CharField(default=str, max_length=50)), + ('query_string_to_provider', models.CharField(default=str, max_length=256)), + ('result_processor_json_feedback', models.JSONField(default=list)), + ('query_to_provider', models.CharField(default=str, max_length=2048)), + ('query_processors', models.JSONField(blank=True, default=list)), + ('result_processors', models.JSONField(blank=True, default=list)), + ('messages', models.JSONField(blank=True, default=list)), + ('status', models.CharField(default=str, max_length=20)), + ('retrieved', models.IntegerField(default=0)), + ('found', models.IntegerField(default=0)), + ('time', models.FloatField(default=0.0)), + ('json_results', models.JSONField(default=list)), + ('tags', models.JSONField(default=list)), + ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ('search_id', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='swirl.search')), + ], + options={ + 'ordering': ['-date_updated'], + }, + ), + migrations.CreateModel( + name='SearchProvider', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('name', models.CharField(max_length=200)), + ('shared', models.BooleanField(default=False)), + ('date_created', models.DateTimeField(auto_now_add=True)), + ('date_updated', models.DateTimeField(auto_now=True)), + ('active', models.BooleanField(default=True)), + ('default', models.BooleanField(default=True)), + ('authenticator', models.CharField(blank=True, choices=[('Microsoft', 'Microsoft Authentication')], default='', max_length=200)), + ('connector', models.CharField(choices=[('ChatGPT', 'ChatGPT Query String'), ('GenAI', 'Generative AI'), ('RequestsGet', 'HTTP/GET returning JSON'), ('RequestsPost', 'HTTP/POST returning JSON'), ('Elastic', 'Elasticsearch Query String'), ('OpenSearch', 'OpenSearch Query String'), ('QdrantDB', 'QdrantDB'), ('BigQuery', 'Google BigQuery'), ('Sqlite3', 'Sqlite3'), ('M365OutlookMessages', 'M365 Outlook Messages'), ('M365OneDrive', 'M365 One Drive'), ('M365OutlookCalendar', 'M365 Outlook Calendar'), ('M365SharePointSites', 'M365 SharePoint Sites'), ('MicrosoftTeams', 'Microsoft Teams'), ('MongoDB', 'MongoDB'), ('Oracle', 'Oracle'), ('Snowflake', 'Snowflake'), ('PineconeDB', 'PineconeDB')], default='RequestsGet', max_length=200)), + ('url', models.CharField(blank=True, default=str, max_length=2048)), + ('query_template', models.CharField(blank=True, default='{url}?q={query_string}', max_length=2048)), + ('query_template_json', models.JSONField(blank=True, default={})), + ('post_query_template', models.JSONField(blank=True, default={})), + ('query_processors', models.JSONField(blank=True, default=swirl.models.getSearchProviderQueryProcessorsDefault)), + ('query_mappings', models.CharField(blank=True, default=str, max_length=2048)), + ('response_mappings', models.CharField(blank=True, default=str, max_length=2048)), + ('result_grouping_field', models.CharField(blank=True, default=str, max_length=1024)), + ('result_processors', models.JSONField(blank=True, default=swirl.models.getSearchProviderResultProcessorsDefault)), + ('result_mappings', models.CharField(blank=True, default=str, max_length=2048)), + ('results_per_query', models.IntegerField(default=10)), + ('eval_credentials', models.CharField(blank=True, default=str, max_length=100)), + ('credentials', models.CharField(blank=True, default=str, max_length=512)), + ('tags', models.JSONField(default=list)), + ('http_request_headers', models.JSONField(blank=True, default={})), + ('page_fetch_config_json', models.JSONField(blank=True, default={})), + ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'ordering': ['id'], + }, + ), + migrations.CreateModel( + name='OauthToken', + fields=[ + ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('idp', models.CharField(default='Microsoft', max_length=32)), + ('token', models.CharField(max_length=4096)), + ('refresh_token', models.CharField(blank=True, max_length=4096, null=True)), + ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'unique_together': {('owner', 'idp')}, + }, + ), + migrations.CreateModel( + name='QueryTransform', + fields=[ + ('id', models.BigAutoField(primary_key=True, serialize=False)), + ('name', models.CharField(max_length=255)), + ('shared', models.BooleanField(default=False)), + ('date_created', models.DateTimeField(auto_now_add=True)), + ('date_updated', models.DateTimeField(auto_now=True)), + ('qrx_type', models.CharField(choices=[('rewrite', 'Rewrite'), ('synonym', 'Synonym'), ('bag', 'Synonym Bag')], default='rewrite', max_length=64)), + ('config_content', models.TextField()), + ('owner', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)), + ], + options={ + 'unique_together': {('name', 'qrx_type')}, + }, + ), + ] diff --git a/swirl/migrations/0002_alter_result_query_string_to_provider_and_more.py b/swirl/migrations/0002_alter_result_query_string_to_provider_and_more.py new file mode 100644 index 000000000..c8cdcbf6b --- /dev/null +++ b/swirl/migrations/0002_alter_result_query_string_to_provider_and_more.py @@ -0,0 +1,28 @@ +# Generated by Django 5.1.1 on 2024-10-16 15:04 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('swirl', '0001_initial'), + ] + + operations = [ + migrations.AlterField( + model_name='result', + name='query_string_to_provider', + field=models.CharField(default=str, max_length=2048), + ), + migrations.AlterField( + model_name='search', + name='query_string', + field=models.CharField(default=str, max_length=2048), + ), + migrations.AlterField( + model_name='search', + name='query_string_processed', + field=models.CharField(blank=True, default=str, max_length=2048), + ), + ] diff --git a/swirl/migrations/__init__.py b/swirl/migrations/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/swirl/models.py b/swirl/models.py index 5cf16137f..fde2cb45e 100644 --- a/swirl/models.py +++ b/swirl/models.py @@ -12,6 +12,7 @@ def getSearchProviderQueryProcessorsDefault(): def getSearchProviderResultProcessorsDefault(): return ["MappingResultProcessor","DateFinderResultProcessor","CosineRelevancyResultProcessor"] +MAX_QUERY_STRING_LENGTH = 2048 class FlexibleChoiceField(models.CharField): """ Allow choices and free text so we can have a user named and shared query transform @@ -154,8 +155,8 @@ class Search(models.Model): owner = models.ForeignKey('auth.User', on_delete=models.CASCADE) date_created = models.DateTimeField(auto_now_add=True) date_updated = models.DateTimeField(auto_now=True) - query_string = models.CharField(max_length=256, default=str) - query_string_processed = models.CharField(max_length=256, default=str, blank=True) + query_string = models.CharField(max_length=MAX_QUERY_STRING_LENGTH, default=str) + query_string_processed = models.CharField(max_length=MAX_QUERY_STRING_LENGTH, default=str, blank=True) SORT_CHOICES = [ ('relevancy', 'relevancy'), ('date', 'date') @@ -225,9 +226,9 @@ class Result(models.Model): search_id = models.ForeignKey(Search, on_delete=models.CASCADE) provider_id = models.IntegerField(default=0) searchprovider = models.CharField(max_length=50, default=str) - query_string_to_provider = models.CharField(max_length=256, default=str) + query_string_to_provider = models.CharField(max_length=MAX_QUERY_STRING_LENGTH, default=str) result_processor_json_feedback = models.JSONField(default=list) - query_to_provider = models.CharField(max_length=2048, default=str) + query_to_provider = models.CharField(max_length=MAX_QUERY_STRING_LENGTH, default=str) query_processors = models.JSONField(default=list, blank=True) result_processors = models.JSONField(default=list, blank=True) messages = models.JSONField(default=list, blank=True)