From 3d59d3eddba5d801395f4f71e605fc4f339c31d1 Mon Sep 17 00:00:00 2001 From: Karen Shaw Date: Wed, 21 Jun 2023 16:44:43 +0000 Subject: [PATCH 1/6] Create prototype deployment --- .github/workflows/build.yml | 47 ------ .github/workflows/deploy.yml | 82 ++-------- .github/workflows/docs.yml | 53 +++++++ .github/workflows/test-node.yml | 30 ++++ .github/workflows/test-python.yml | 29 ++++ .github/workflows/validate-template.yml | 25 +++ .gitignore | 8 + .husky/pre-commit | 2 +- .tool-versions | 1 + .mocharc.js => node/.mocharc.js | 0 .npmignore => node/.npmignore | 0 nyc.config.js => node/nyc.config.js | 0 package-lock.json => node/package-lock.json | 0 package.json => node/package.json | 2 +- {redirect => node/redirect}/index.js | 0 {src => node/src}/api/api-token.js | 0 {src => node/src}/api/opensearch.js | 0 {src => node/src}/api/pagination.js | 0 {src => node/src}/api/request/models.js | 0 {src => node/src}/api/request/pipeline.js | 0 {src => node/src}/api/response/error.js | 0 .../src}/api/response/iiif/collection.js | 0 .../src}/api/response/iiif/manifest.js | 0 .../response/iiif/presentation-api/items.js | 0 .../iiif/presentation-api/metadata.js | 0 .../presentation-api/placeholder-canvas.js | 0 .../src}/api/response/opensearch/index.js | 0 {src => node/src}/api/response/transformer.js | 0 {src => node/src}/aws/fetch.js | 0 {src => node/src}/environment.js | 0 .../src}/handlers/authorize-document.js | 0 {src => node/src}/handlers/default-request.js | 0 .../src}/handlers/get-auth-callback.js | 0 {src => node/src}/handlers/get-auth-login.js | 0 {src => node/src}/handlers/get-auth-logout.js | 0 {src => node/src}/handlers/get-auth-whoami.js | 0 .../src}/handlers/get-collection-by-id.js | 0 {src => node/src}/handlers/get-collections.js | 0 .../src}/handlers/get-file-set-auth.js | 0 .../src}/handlers/get-file-set-by-id.js | 0 .../src}/handlers/get-file-set-download.js | 0 .../src}/handlers/get-shared-link-by-id.js | 0 {src => node/src}/handlers/get-similar.js | 0 {src => node/src}/handlers/get-thumbnail.js | 0 {src => node/src}/handlers/get-work-auth.js | 0 {src => node/src}/handlers/get-work-by-id.js | 0 {src => node/src}/handlers/middleware.js | 0 {src => node/src}/handlers/oai.js | 0 {src => node/src}/handlers/oai/search.js | 0 {src => node/src}/handlers/oai/verbs.js | 0 .../src}/handlers/oai/xml-transformer.js | 0 {src => node/src}/handlers/options-request.js | 0 {src => node/src}/handlers/search-runner.js | 0 {src => node/src}/handlers/search.js | 0 .../src}/handlers/transcode-templates.js | 0 {src => node/src}/helpers.js | 0 {src => node/src}/honeybadger-setup.js | 0 {src => node/src}/package-lock.json | 0 {src => node/src}/package.json | 0 {test => node/test}/.eslintrc | 0 .../mocks/collection-1234-no-thumbnail.json | 0 .../collection-1234-private-published.json | 0 .../test}/fixtures/mocks/collection-1234.json | 0 .../test}/fixtures/mocks/collections.json | 0 .../mocks/expired-shared-link-9101112.json | 0 .../test}/fixtures/mocks/fileset-1234.json | 0 .../fixtures/mocks/fileset-baddata-1234.json | 0 .../fixtures/mocks/fileset-netid-1234.json | 0 .../mocks/fileset-restricted-1234.json | 0 .../fileset-restricted-unpublished-1234.json | 0 .../mocks/fileset-unpublished-1234.json | 0 .../mocks/missing-collection-1234.json | 0 .../fixtures/mocks/missing-fileset-1234.json | 0 .../test}/fixtures/mocks/missing-index.json | 0 .../mocks/missing-shared-link-5678.json | 0 .../fixtures/mocks/missing-work-1234.json | 0 .../mocks/oai-list-identifiers-sets.json | 0 .../test}/fixtures/mocks/oai-sets.json | 0 .../mocks/private-unpublished-work-1234.json | 0 .../fixtures/mocks/private-work-1234.json | 0 .../fixtures/mocks/real-search-event.json | 0 .../test}/fixtures/mocks/scroll-empty.json | 0 .../test}/fixtures/mocks/scroll-missing.json | 0 .../test}/fixtures/mocks/scroll.json | 0 .../mocks/search-earliest-record.json | 0 .../mocks/search-multiple-targets.json | 0 .../test}/fixtures/mocks/search.json | 0 .../fixtures/mocks/shared-link-1234.json | 0 .../test}/fixtures/mocks/similar.json | 0 .../test}/fixtures/mocks/thumbnail_full.jpg | Bin .../test}/fixtures/mocks/thumbnail_square.jpg | Bin .../fixtures/mocks/unpublished-work-1234.json | 0 .../mocks/work-1234-no-collection.json | 0 .../work-1234-no-fileset-width-height.json | 0 .../mocks/work-1234-no-thumbnail.json | 0 .../test}/fixtures/mocks/work-1234.json | 0 .../test}/fixtures/mocks/work-netid-1234.json | 0 .../fixtures/mocks/work-restricted-1234.json | 0 .../work-restricted-unpublished-1234.json | 0 .../test}/fixtures/mocks/work-video-5678.json | 0 .../test}/integration/default-handler.test.js | 0 .../integration/get-auth-callback.test.js | 0 .../test}/integration/get-auth-login.test.js | 0 .../test}/integration/get-auth-logout.test.js | 0 .../test}/integration/get-auth-whoami.test.js | 0 .../integration/get-collection-by-id.test.js | 0 .../test}/integration/get-collections.test.js | 0 .../test}/integration/get-doc.test.js | 0 .../integration/get-file-set-auth.test.js | 0 .../integration/get-file-set-download.test.js | 0 .../integration/get-shared-link-by-id.test.js | 0 .../test}/integration/get-similar.test.js | 0 .../test}/integration/get-thumbnail.test.js | 0 .../test}/integration/get-work-auth.test.js | 0 .../test}/integration/get-work-by-id.test.js | 0 .../test}/integration/middleware.test.js | 0 {test => node/test}/integration/oai.test.js | 0 .../test}/integration/options-request.test.js | 0 .../test}/integration/search.test.js | 0 .../test}/test-helpers/event-builder.js | 0 {test => node/test}/test-helpers/index.js | 0 .../test}/unit/api/api-token.test.js | 0 {test => node/test}/unit/api/helpers.test.js | 0 .../test}/unit/api/opensearch.test.js | 0 .../test}/unit/api/pagination.test.js | 0 .../test}/unit/api/request/models.test.js | 0 .../test}/unit/api/request/pipeline.test.js | 0 .../test}/unit/api/response/error.test.js | 0 .../unit/api/response/iiif/collection.test.js | 0 .../unit/api/response/iiif/manifest.test.js | 0 .../iiif/presentation-api/items.test.js | 0 .../iiif/presentation-api/metadata.test.js | 0 .../placeholder-canvas.test.js | 0 .../unit/api/response/opensearch.test.js | 0 .../test}/unit/aws/environment.test.js | 0 {test => node/test}/unit/package.test.js | 0 {test => node/test}/unit/redirect.test.js | 0 python/requirements.txt | 2 + python/src/handlers/hello.py | 11 ++ python/src/handlers/hello.test.py | 14 ++ python/src/requirements.txt | 6 + template.yaml | 150 ++++++++++-------- 142 files changed, 278 insertions(+), 184 deletions(-) delete mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/docs.yml create mode 100644 .github/workflows/test-node.yml create mode 100644 .github/workflows/test-python.yml create mode 100644 .github/workflows/validate-template.yml rename .mocharc.js => node/.mocharc.js (100%) rename .npmignore => node/.npmignore (100%) rename nyc.config.js => node/nyc.config.js (100%) rename package-lock.json => node/package-lock.json (100%) rename package.json => node/package.json (93%) rename {redirect => node/redirect}/index.js (100%) rename {src => node/src}/api/api-token.js (100%) rename {src => node/src}/api/opensearch.js (100%) rename {src => node/src}/api/pagination.js (100%) rename {src => node/src}/api/request/models.js (100%) rename {src => node/src}/api/request/pipeline.js (100%) rename {src => node/src}/api/response/error.js (100%) rename {src => node/src}/api/response/iiif/collection.js (100%) rename {src => node/src}/api/response/iiif/manifest.js (100%) rename {src => node/src}/api/response/iiif/presentation-api/items.js (100%) rename {src => node/src}/api/response/iiif/presentation-api/metadata.js (100%) rename {src => node/src}/api/response/iiif/presentation-api/placeholder-canvas.js (100%) rename {src => node/src}/api/response/opensearch/index.js (100%) rename {src => node/src}/api/response/transformer.js (100%) rename {src => node/src}/aws/fetch.js (100%) rename {src => node/src}/environment.js (100%) rename {src => node/src}/handlers/authorize-document.js (100%) rename {src => node/src}/handlers/default-request.js (100%) rename {src => node/src}/handlers/get-auth-callback.js (100%) rename {src => node/src}/handlers/get-auth-login.js (100%) rename {src => node/src}/handlers/get-auth-logout.js (100%) rename {src => node/src}/handlers/get-auth-whoami.js (100%) rename {src => node/src}/handlers/get-collection-by-id.js (100%) rename {src => node/src}/handlers/get-collections.js (100%) rename {src => node/src}/handlers/get-file-set-auth.js (100%) rename {src => node/src}/handlers/get-file-set-by-id.js (100%) rename {src => node/src}/handlers/get-file-set-download.js (100%) rename {src => node/src}/handlers/get-shared-link-by-id.js (100%) rename {src => node/src}/handlers/get-similar.js (100%) rename {src => node/src}/handlers/get-thumbnail.js (100%) rename {src => node/src}/handlers/get-work-auth.js (100%) rename {src => node/src}/handlers/get-work-by-id.js (100%) rename {src => node/src}/handlers/middleware.js (100%) rename {src => node/src}/handlers/oai.js (100%) rename {src => node/src}/handlers/oai/search.js (100%) rename {src => node/src}/handlers/oai/verbs.js (100%) rename {src => node/src}/handlers/oai/xml-transformer.js (100%) rename {src => node/src}/handlers/options-request.js (100%) rename {src => node/src}/handlers/search-runner.js (100%) rename {src => node/src}/handlers/search.js (100%) rename {src => node/src}/handlers/transcode-templates.js (100%) rename {src => node/src}/helpers.js (100%) rename {src => node/src}/honeybadger-setup.js (100%) rename {src => node/src}/package-lock.json (100%) rename {src => node/src}/package.json (100%) rename {test => node/test}/.eslintrc (100%) rename {test => node/test}/fixtures/mocks/collection-1234-no-thumbnail.json (100%) rename {test => node/test}/fixtures/mocks/collection-1234-private-published.json (100%) rename {test => node/test}/fixtures/mocks/collection-1234.json (100%) rename {test => node/test}/fixtures/mocks/collections.json (100%) rename {test => node/test}/fixtures/mocks/expired-shared-link-9101112.json (100%) rename {test => node/test}/fixtures/mocks/fileset-1234.json (100%) rename {test => node/test}/fixtures/mocks/fileset-baddata-1234.json (100%) rename {test => node/test}/fixtures/mocks/fileset-netid-1234.json (100%) rename {test => node/test}/fixtures/mocks/fileset-restricted-1234.json (100%) rename {test => node/test}/fixtures/mocks/fileset-restricted-unpublished-1234.json (100%) rename {test => node/test}/fixtures/mocks/fileset-unpublished-1234.json (100%) rename {test => node/test}/fixtures/mocks/missing-collection-1234.json (100%) rename {test => node/test}/fixtures/mocks/missing-fileset-1234.json (100%) rename {test => node/test}/fixtures/mocks/missing-index.json (100%) rename {test => node/test}/fixtures/mocks/missing-shared-link-5678.json (100%) rename {test => node/test}/fixtures/mocks/missing-work-1234.json (100%) rename {test => node/test}/fixtures/mocks/oai-list-identifiers-sets.json (100%) rename {test => node/test}/fixtures/mocks/oai-sets.json (100%) rename {test => node/test}/fixtures/mocks/private-unpublished-work-1234.json (100%) rename {test => node/test}/fixtures/mocks/private-work-1234.json (100%) rename {test => node/test}/fixtures/mocks/real-search-event.json (100%) rename {test => node/test}/fixtures/mocks/scroll-empty.json (100%) rename {test => node/test}/fixtures/mocks/scroll-missing.json (100%) rename {test => node/test}/fixtures/mocks/scroll.json (100%) rename {test => node/test}/fixtures/mocks/search-earliest-record.json (100%) rename {test => node/test}/fixtures/mocks/search-multiple-targets.json (100%) rename {test => node/test}/fixtures/mocks/search.json (100%) rename {test => node/test}/fixtures/mocks/shared-link-1234.json (100%) rename {test => node/test}/fixtures/mocks/similar.json (100%) rename {test => node/test}/fixtures/mocks/thumbnail_full.jpg (100%) rename {test => node/test}/fixtures/mocks/thumbnail_square.jpg (100%) rename {test => node/test}/fixtures/mocks/unpublished-work-1234.json (100%) rename {test => node/test}/fixtures/mocks/work-1234-no-collection.json (100%) rename {test => node/test}/fixtures/mocks/work-1234-no-fileset-width-height.json (100%) rename {test => node/test}/fixtures/mocks/work-1234-no-thumbnail.json (100%) rename {test => node/test}/fixtures/mocks/work-1234.json (100%) rename {test => node/test}/fixtures/mocks/work-netid-1234.json (100%) rename {test => node/test}/fixtures/mocks/work-restricted-1234.json (100%) rename {test => node/test}/fixtures/mocks/work-restricted-unpublished-1234.json (100%) rename {test => node/test}/fixtures/mocks/work-video-5678.json (100%) rename {test => node/test}/integration/default-handler.test.js (100%) rename {test => node/test}/integration/get-auth-callback.test.js (100%) rename {test => node/test}/integration/get-auth-login.test.js (100%) rename {test => node/test}/integration/get-auth-logout.test.js (100%) rename {test => node/test}/integration/get-auth-whoami.test.js (100%) rename {test => node/test}/integration/get-collection-by-id.test.js (100%) rename {test => node/test}/integration/get-collections.test.js (100%) rename {test => node/test}/integration/get-doc.test.js (100%) rename {test => node/test}/integration/get-file-set-auth.test.js (100%) rename {test => node/test}/integration/get-file-set-download.test.js (100%) rename {test => node/test}/integration/get-shared-link-by-id.test.js (100%) rename {test => node/test}/integration/get-similar.test.js (100%) rename {test => node/test}/integration/get-thumbnail.test.js (100%) rename {test => node/test}/integration/get-work-auth.test.js (100%) rename {test => node/test}/integration/get-work-by-id.test.js (100%) rename {test => node/test}/integration/middleware.test.js (100%) rename {test => node/test}/integration/oai.test.js (100%) rename {test => node/test}/integration/options-request.test.js (100%) rename {test => node/test}/integration/search.test.js (100%) rename {test => node/test}/test-helpers/event-builder.js (100%) rename {test => node/test}/test-helpers/index.js (100%) rename {test => node/test}/unit/api/api-token.test.js (100%) rename {test => node/test}/unit/api/helpers.test.js (100%) rename {test => node/test}/unit/api/opensearch.test.js (100%) rename {test => node/test}/unit/api/pagination.test.js (100%) rename {test => node/test}/unit/api/request/models.test.js (100%) rename {test => node/test}/unit/api/request/pipeline.test.js (100%) rename {test => node/test}/unit/api/response/error.test.js (100%) rename {test => node/test}/unit/api/response/iiif/collection.test.js (100%) rename {test => node/test}/unit/api/response/iiif/manifest.test.js (100%) rename {test => node/test}/unit/api/response/iiif/presentation-api/items.test.js (100%) rename {test => node/test}/unit/api/response/iiif/presentation-api/metadata.test.js (100%) rename {test => node/test}/unit/api/response/iiif/presentation-api/placeholder-canvas.test.js (100%) rename {test => node/test}/unit/api/response/opensearch.test.js (100%) rename {test => node/test}/unit/aws/environment.test.js (100%) rename {test => node/test}/unit/package.test.js (100%) rename {test => node/test}/unit/redirect.test.js (100%) create mode 100644 python/requirements.txt create mode 100644 python/src/handlers/hello.py create mode 100644 python/src/handlers/hello.test.py create mode 100644 python/src/requirements.txt diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 92be155a..00000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: Run Tests -on: - push: - paths: - - ".github/workflows/build.yml" - - "package.json" - - "package-lock.json" - - "src/**" - - "test/**" - - "template.yaml" - workflow_dispatch: -jobs: - test: - runs-on: ubuntu-latest - env: - AWS_ACCESS_KEY_ID: ci - AWS_SECRET_ACCESS_KEY: ci - steps: - - uses: actions/checkout@v3 - - uses: actions/setup-node@v3 - with: - node-version: 16.x - cache: "npm" - - run: npm ci - - name: Check code style - run: npm run lint && npm run prettier - - name: Run tests - run: npm run test:coverage - - name: Validate OpenAPI spec - run: npm run validate-spec - validate-template: - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - environment: test - steps: - - uses: aws-actions/setup-sam@v1 - - name: sam fix https://github.com/aws/aws-sam-cli/issues/4527 - run: $(dirname $(readlink $(which sam)))/pip install --force-reinstall "cryptography==38.0.4" - - uses: aws-actions/configure-aws-credentials@master - with: - role-to-assume: arn:aws:iam::${{ secrets.AwsAccount }}:role/github-actions-role - aws-region: us-east-1 - - uses: actions/checkout@v3 - - name: Validate template - run: sam build && sam validate diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index b8d2ea62..c9e262c9 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -2,24 +2,16 @@ name: Deploy on: push: branches: - - deploy/staging + - deploy/* - main paths: - ".github/workflows/deploy.yml" - - "package.json" - - "package-lock.json" - - "src/**" + - "node/*" + - "python/*" - "template.yaml" workflow_dispatch: - inputs: - force_deploy_docs: - description: Deploy documentation even if no changes detected - type: boolean - default: false concurrency: group: ${{ github.workflow }}-${{ github.ref }} -env: - CONFIG_ENV: ${{ github.ref == 'refs/heads/main' && 'production' || 'staging' }} jobs: build-deploy: runs-on: ubuntu-latest @@ -28,6 +20,17 @@ jobs: contents: read environment: ${{ github.ref == 'refs/heads/main' && 'production' || 'staging' }} steps: + - name: Set CONFIG_ENV from Branch Name + run: | + if [[ $BRANCH == 'refs/heads/main' ]]; then + echo "CONFIG_ENV=production" >> $GITHUB_ENV + else + echo "CONFIG_ENV=$(echo $BRANCH | awk -F/ '{print $NF}')" >> $GITHUB_ENV + fi + env: + BRANCH: ${{ github.ref }} + - name: Confirm deploy environment + run: echo "Deploying to '$CONFIG_ENV' environment" - name: Set GitHub Deploy Key uses: webfactory/ssh-agent@v0.5.3 with: @@ -59,60 +62,3 @@ jobs: | sed 's/\(Parameter overrides\s*\): .*/\1: ***** REDACTED *****/' env: HONEYBADGER_REVISION: ${{ github.sha }} - docs-changed: - runs-on: ubuntu-latest - outputs: - result: ${{ steps.changed-files.outputs.any_modified == 'true' }} - steps: - - uses: actions/checkout@v2 - with: - fetch-depth: 2 - - name: Get changed doc files - id: changed-files - uses: tj-actions/changed-files@v29.0.2 - with: - files: | - .github/workflows/deploy.yaml - docs/* - publish-docs: - needs: docs-changed - if: ${{ needs.docs-changed.outputs.result == 'true' || inputs.force_deploy_docs }} - runs-on: ubuntu-latest - permissions: - id-token: write - contents: read - environment: ${{ github.ref == 'refs/heads/main' && 'production' || 'staging' }} - steps: - - name: Configure AWS Credentials - uses: aws-actions/configure-aws-credentials@master - with: - role-to-assume: arn:aws:iam::${{ secrets.AwsAccount }}:role/github-actions-role - aws-region: us-east-1 - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - uses: abatilo/actions-poetry@v2 - with: - poetry-version: 1.4.2 - - name: Install dependencies - run: poetry install - working-directory: ./docs - - name: Build docs - run: poetry run mkdocs build --clean - working-directory: ./docs - - name: Determine correct deploy domain for environment - run: sed -i s/API_HOST/${HOSTNAME}/g docs/site/spec/openapi.* - env: - HOSTNAME: ${{ secrets.Hostname }}.${{ secrets.HostedZone }} - - name: Generate JSON API - uses: openapi-generators/openapitools-generator-action@v1 - with: - generator: openapi - openapi-file: docs/site/spec/openapi.yaml - command-args: -o docs/site/spec - - name: Copy to S3 - run: aws s3 sync --delete docs/site/ s3://${HOST}-docs.${ZONE}/ - env: - HOST: ${{ secrets.Hostname }} - ZONE: ${{ secrets.HostedZone }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 00000000..5cdee7e1 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,53 @@ +name: Build and deploy API documentation +on: + push: + branches: + - deploy/staging + - main + paths: + - .github/workflows/docs.yaml + - docs/* + workflow_dispatch: +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} +jobs: + publish-docs: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + environment: ${{ github.ref == 'refs/heads/main' && 'production' || 'staging' }} + steps: + - name: Configure AWS Credentials + uses: aws-actions/configure-aws-credentials@master + with: + role-to-assume: arn:aws:iam::${{ secrets.AwsAccount }}:role/github-actions-role + aws-region: us-east-1 + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: 3.9 + - uses: abatilo/actions-poetry@v2 + with: + poetry-version: 1.4.2 + - name: Install dependencies + run: poetry install + working-directory: ./docs + - name: Build docs + run: poetry run mkdocs build --clean + working-directory: ./docs + - name: Determine correct deploy domain for environment + run: sed -i s/API_HOST/${HOSTNAME}/g docs/site/spec/openapi.* + env: + HOSTNAME: ${{ secrets.Hostname }}.${{ secrets.HostedZone }} + - name: Generate JSON API + uses: openapi-generators/openapitools-generator-action@v1 + with: + generator: openapi + openapi-file: docs/site/spec/openapi.yaml + command-args: -o docs/site/spec + - name: Copy to S3 + run: aws s3 sync --delete docs/site/ s3://${HOST}-docs.${ZONE}/ + env: + HOST: ${{ secrets.Hostname }} + ZONE: ${{ secrets.HostedZone }} diff --git a/.github/workflows/test-node.yml b/.github/workflows/test-node.yml new file mode 100644 index 00000000..7c42ea5d --- /dev/null +++ b/.github/workflows/test-node.yml @@ -0,0 +1,30 @@ +name: Run NodeJS Tests +on: + push: + paths: + - ".github/workflows/test-node.yml" + - "node/**" + workflow_dispatch: +defaults: + run: + working-directory: ./node +jobs: + test: + runs-on: ubuntu-latest + env: + AWS_ACCESS_KEY_ID: ci + AWS_SECRET_ACCESS_KEY: ci + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-node@v3 + with: + node-version: 16.x + cache: "npm" + cache-dependency-path: 'node/package-lock.json' + - run: npm ci + - name: Check code style + run: npm run lint && npm run prettier + - name: Run tests + run: npm run test:coverage + - name: Validate OpenAPI spec + run: npm run validate-spec diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml new file mode 100644 index 00000000..bfc7a8ac --- /dev/null +++ b/.github/workflows/test-python.yml @@ -0,0 +1,29 @@ +name: Run Python Tests +on: + push: + paths: + - ".github/workflows/test-python.yml" + - "python/**" + workflow_dispatch: +defaults: + run: + working-directory: ./python +jobs: + test: + runs-on: ubuntu-latest + env: + AWS_ACCESS_KEY_ID: ci + AWS_SECRET_ACCESS_KEY: ci + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + cache-dependency-path: python/requirements.txt + - run: npm ci + - name: Check code style + run: npm run lint && npm run prettier + - name: Run tests + run: npm run test:coverage + - name: Validate OpenAPI spec + run: npm run validate-spec diff --git a/.github/workflows/validate-template.yml b/.github/workflows/validate-template.yml new file mode 100644 index 00000000..7625850a --- /dev/null +++ b/.github/workflows/validate-template.yml @@ -0,0 +1,25 @@ +name: Validate Template +on: + push: + paths: + - ".github/workflows/validate-template.yml" + - "./template.yml" + workflow_dispatch: +jobs: + validate-template: + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + environment: test + steps: + - uses: aws-actions/setup-sam@v1 + - name: sam fix https://github.com/aws/aws-sam-cli/issues/4527 + run: $(dirname $(readlink $(which sam)))/pip install --force-reinstall "cryptography==38.0.4" + - uses: aws-actions/configure-aws-credentials@master + with: + role-to-assume: arn:aws:iam::${{ secrets.AwsAccount }}:role/github-actions-role + aws-region: us-east-1 + - uses: actions/checkout@v3 + - name: Validate template + run: sam build && sam validate \ No newline at end of file diff --git a/.gitignore b/.gitignore index abfc23df..50cd1c55 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,14 @@ yarn-debug.log* yarn-error.log* lerna-debug.log* +### Python ### +__pycache__/ +*.py[cod] +*$py.class +pip-log.txt +pip-delete-this-directory.txt +*.py,cover + # Diagnostic reports (https://nodejs.org/api/report.html) report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json diff --git a/.husky/pre-commit b/.husky/pre-commit index 18193ffb..66741865 100755 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1,4 +1,4 @@ #!/usr/bin/env sh . "$(dirname -- "$0")/_/husky.sh" - +cd node npm run lint && npm run prettier diff --git a/.tool-versions b/.tool-versions index 92427f10..4634f4c6 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1,3 +1,4 @@ nodejs 16.14.0 java corretto-19.0.1.10.1 aws-sam-cli 1.107.0 +python 3.10.5 diff --git a/.mocharc.js b/node/.mocharc.js similarity index 100% rename from .mocharc.js rename to node/.mocharc.js diff --git a/.npmignore b/node/.npmignore similarity index 100% rename from .npmignore rename to node/.npmignore diff --git a/nyc.config.js b/node/nyc.config.js similarity index 100% rename from nyc.config.js rename to node/nyc.config.js diff --git a/package-lock.json b/node/package-lock.json similarity index 100% rename from package-lock.json rename to node/package-lock.json diff --git a/package.json b/node/package.json similarity index 93% rename from package.json rename to node/package.json index 391e8d08..8e15f511 100644 --- a/package.json +++ b/node/package.json @@ -15,7 +15,7 @@ "prettier:fix": "prettier -cw src test", "test": "mocha", "test:coverage": "nyc npm test", - "validate-spec": "openapi-generator-cli validate -i ./docs/docs/spec/openapi.yaml" + "validate-spec": "openapi-generator-cli validate -i ../docs/docs/spec/openapi.yaml" }, "devDependencies": { "@openapitools/openapi-generator-cli": "^2.5.2", diff --git a/redirect/index.js b/node/redirect/index.js similarity index 100% rename from redirect/index.js rename to node/redirect/index.js diff --git a/src/api/api-token.js b/node/src/api/api-token.js similarity index 100% rename from src/api/api-token.js rename to node/src/api/api-token.js diff --git a/src/api/opensearch.js b/node/src/api/opensearch.js similarity index 100% rename from src/api/opensearch.js rename to node/src/api/opensearch.js diff --git a/src/api/pagination.js b/node/src/api/pagination.js similarity index 100% rename from src/api/pagination.js rename to node/src/api/pagination.js diff --git a/src/api/request/models.js b/node/src/api/request/models.js similarity index 100% rename from src/api/request/models.js rename to node/src/api/request/models.js diff --git a/src/api/request/pipeline.js b/node/src/api/request/pipeline.js similarity index 100% rename from src/api/request/pipeline.js rename to node/src/api/request/pipeline.js diff --git a/src/api/response/error.js b/node/src/api/response/error.js similarity index 100% rename from src/api/response/error.js rename to node/src/api/response/error.js diff --git a/src/api/response/iiif/collection.js b/node/src/api/response/iiif/collection.js similarity index 100% rename from src/api/response/iiif/collection.js rename to node/src/api/response/iiif/collection.js diff --git a/src/api/response/iiif/manifest.js b/node/src/api/response/iiif/manifest.js similarity index 100% rename from src/api/response/iiif/manifest.js rename to node/src/api/response/iiif/manifest.js diff --git a/src/api/response/iiif/presentation-api/items.js b/node/src/api/response/iiif/presentation-api/items.js similarity index 100% rename from src/api/response/iiif/presentation-api/items.js rename to node/src/api/response/iiif/presentation-api/items.js diff --git a/src/api/response/iiif/presentation-api/metadata.js b/node/src/api/response/iiif/presentation-api/metadata.js similarity index 100% rename from src/api/response/iiif/presentation-api/metadata.js rename to node/src/api/response/iiif/presentation-api/metadata.js diff --git a/src/api/response/iiif/presentation-api/placeholder-canvas.js b/node/src/api/response/iiif/presentation-api/placeholder-canvas.js similarity index 100% rename from src/api/response/iiif/presentation-api/placeholder-canvas.js rename to node/src/api/response/iiif/presentation-api/placeholder-canvas.js diff --git a/src/api/response/opensearch/index.js b/node/src/api/response/opensearch/index.js similarity index 100% rename from src/api/response/opensearch/index.js rename to node/src/api/response/opensearch/index.js diff --git a/src/api/response/transformer.js b/node/src/api/response/transformer.js similarity index 100% rename from src/api/response/transformer.js rename to node/src/api/response/transformer.js diff --git a/src/aws/fetch.js b/node/src/aws/fetch.js similarity index 100% rename from src/aws/fetch.js rename to node/src/aws/fetch.js diff --git a/src/environment.js b/node/src/environment.js similarity index 100% rename from src/environment.js rename to node/src/environment.js diff --git a/src/handlers/authorize-document.js b/node/src/handlers/authorize-document.js similarity index 100% rename from src/handlers/authorize-document.js rename to node/src/handlers/authorize-document.js diff --git a/src/handlers/default-request.js b/node/src/handlers/default-request.js similarity index 100% rename from src/handlers/default-request.js rename to node/src/handlers/default-request.js diff --git a/src/handlers/get-auth-callback.js b/node/src/handlers/get-auth-callback.js similarity index 100% rename from src/handlers/get-auth-callback.js rename to node/src/handlers/get-auth-callback.js diff --git a/src/handlers/get-auth-login.js b/node/src/handlers/get-auth-login.js similarity index 100% rename from src/handlers/get-auth-login.js rename to node/src/handlers/get-auth-login.js diff --git a/src/handlers/get-auth-logout.js b/node/src/handlers/get-auth-logout.js similarity index 100% rename from src/handlers/get-auth-logout.js rename to node/src/handlers/get-auth-logout.js diff --git a/src/handlers/get-auth-whoami.js b/node/src/handlers/get-auth-whoami.js similarity index 100% rename from src/handlers/get-auth-whoami.js rename to node/src/handlers/get-auth-whoami.js diff --git a/src/handlers/get-collection-by-id.js b/node/src/handlers/get-collection-by-id.js similarity index 100% rename from src/handlers/get-collection-by-id.js rename to node/src/handlers/get-collection-by-id.js diff --git a/src/handlers/get-collections.js b/node/src/handlers/get-collections.js similarity index 100% rename from src/handlers/get-collections.js rename to node/src/handlers/get-collections.js diff --git a/src/handlers/get-file-set-auth.js b/node/src/handlers/get-file-set-auth.js similarity index 100% rename from src/handlers/get-file-set-auth.js rename to node/src/handlers/get-file-set-auth.js diff --git a/src/handlers/get-file-set-by-id.js b/node/src/handlers/get-file-set-by-id.js similarity index 100% rename from src/handlers/get-file-set-by-id.js rename to node/src/handlers/get-file-set-by-id.js diff --git a/src/handlers/get-file-set-download.js b/node/src/handlers/get-file-set-download.js similarity index 100% rename from src/handlers/get-file-set-download.js rename to node/src/handlers/get-file-set-download.js diff --git a/src/handlers/get-shared-link-by-id.js b/node/src/handlers/get-shared-link-by-id.js similarity index 100% rename from src/handlers/get-shared-link-by-id.js rename to node/src/handlers/get-shared-link-by-id.js diff --git a/src/handlers/get-similar.js b/node/src/handlers/get-similar.js similarity index 100% rename from src/handlers/get-similar.js rename to node/src/handlers/get-similar.js diff --git a/src/handlers/get-thumbnail.js b/node/src/handlers/get-thumbnail.js similarity index 100% rename from src/handlers/get-thumbnail.js rename to node/src/handlers/get-thumbnail.js diff --git a/src/handlers/get-work-auth.js b/node/src/handlers/get-work-auth.js similarity index 100% rename from src/handlers/get-work-auth.js rename to node/src/handlers/get-work-auth.js diff --git a/src/handlers/get-work-by-id.js b/node/src/handlers/get-work-by-id.js similarity index 100% rename from src/handlers/get-work-by-id.js rename to node/src/handlers/get-work-by-id.js diff --git a/src/handlers/middleware.js b/node/src/handlers/middleware.js similarity index 100% rename from src/handlers/middleware.js rename to node/src/handlers/middleware.js diff --git a/src/handlers/oai.js b/node/src/handlers/oai.js similarity index 100% rename from src/handlers/oai.js rename to node/src/handlers/oai.js diff --git a/src/handlers/oai/search.js b/node/src/handlers/oai/search.js similarity index 100% rename from src/handlers/oai/search.js rename to node/src/handlers/oai/search.js diff --git a/src/handlers/oai/verbs.js b/node/src/handlers/oai/verbs.js similarity index 100% rename from src/handlers/oai/verbs.js rename to node/src/handlers/oai/verbs.js diff --git a/src/handlers/oai/xml-transformer.js b/node/src/handlers/oai/xml-transformer.js similarity index 100% rename from src/handlers/oai/xml-transformer.js rename to node/src/handlers/oai/xml-transformer.js diff --git a/src/handlers/options-request.js b/node/src/handlers/options-request.js similarity index 100% rename from src/handlers/options-request.js rename to node/src/handlers/options-request.js diff --git a/src/handlers/search-runner.js b/node/src/handlers/search-runner.js similarity index 100% rename from src/handlers/search-runner.js rename to node/src/handlers/search-runner.js diff --git a/src/handlers/search.js b/node/src/handlers/search.js similarity index 100% rename from src/handlers/search.js rename to node/src/handlers/search.js diff --git a/src/handlers/transcode-templates.js b/node/src/handlers/transcode-templates.js similarity index 100% rename from src/handlers/transcode-templates.js rename to node/src/handlers/transcode-templates.js diff --git a/src/helpers.js b/node/src/helpers.js similarity index 100% rename from src/helpers.js rename to node/src/helpers.js diff --git a/src/honeybadger-setup.js b/node/src/honeybadger-setup.js similarity index 100% rename from src/honeybadger-setup.js rename to node/src/honeybadger-setup.js diff --git a/src/package-lock.json b/node/src/package-lock.json similarity index 100% rename from src/package-lock.json rename to node/src/package-lock.json diff --git a/src/package.json b/node/src/package.json similarity index 100% rename from src/package.json rename to node/src/package.json diff --git a/test/.eslintrc b/node/test/.eslintrc similarity index 100% rename from test/.eslintrc rename to node/test/.eslintrc diff --git a/test/fixtures/mocks/collection-1234-no-thumbnail.json b/node/test/fixtures/mocks/collection-1234-no-thumbnail.json similarity index 100% rename from test/fixtures/mocks/collection-1234-no-thumbnail.json rename to node/test/fixtures/mocks/collection-1234-no-thumbnail.json diff --git a/test/fixtures/mocks/collection-1234-private-published.json b/node/test/fixtures/mocks/collection-1234-private-published.json similarity index 100% rename from test/fixtures/mocks/collection-1234-private-published.json rename to node/test/fixtures/mocks/collection-1234-private-published.json diff --git a/test/fixtures/mocks/collection-1234.json b/node/test/fixtures/mocks/collection-1234.json similarity index 100% rename from test/fixtures/mocks/collection-1234.json rename to node/test/fixtures/mocks/collection-1234.json diff --git a/test/fixtures/mocks/collections.json b/node/test/fixtures/mocks/collections.json similarity index 100% rename from test/fixtures/mocks/collections.json rename to node/test/fixtures/mocks/collections.json diff --git a/test/fixtures/mocks/expired-shared-link-9101112.json b/node/test/fixtures/mocks/expired-shared-link-9101112.json similarity index 100% rename from test/fixtures/mocks/expired-shared-link-9101112.json rename to node/test/fixtures/mocks/expired-shared-link-9101112.json diff --git a/test/fixtures/mocks/fileset-1234.json b/node/test/fixtures/mocks/fileset-1234.json similarity index 100% rename from test/fixtures/mocks/fileset-1234.json rename to node/test/fixtures/mocks/fileset-1234.json diff --git a/test/fixtures/mocks/fileset-baddata-1234.json b/node/test/fixtures/mocks/fileset-baddata-1234.json similarity index 100% rename from test/fixtures/mocks/fileset-baddata-1234.json rename to node/test/fixtures/mocks/fileset-baddata-1234.json diff --git a/test/fixtures/mocks/fileset-netid-1234.json b/node/test/fixtures/mocks/fileset-netid-1234.json similarity index 100% rename from test/fixtures/mocks/fileset-netid-1234.json rename to node/test/fixtures/mocks/fileset-netid-1234.json diff --git a/test/fixtures/mocks/fileset-restricted-1234.json b/node/test/fixtures/mocks/fileset-restricted-1234.json similarity index 100% rename from test/fixtures/mocks/fileset-restricted-1234.json rename to node/test/fixtures/mocks/fileset-restricted-1234.json diff --git a/test/fixtures/mocks/fileset-restricted-unpublished-1234.json b/node/test/fixtures/mocks/fileset-restricted-unpublished-1234.json similarity index 100% rename from test/fixtures/mocks/fileset-restricted-unpublished-1234.json rename to node/test/fixtures/mocks/fileset-restricted-unpublished-1234.json diff --git a/test/fixtures/mocks/fileset-unpublished-1234.json b/node/test/fixtures/mocks/fileset-unpublished-1234.json similarity index 100% rename from test/fixtures/mocks/fileset-unpublished-1234.json rename to node/test/fixtures/mocks/fileset-unpublished-1234.json diff --git a/test/fixtures/mocks/missing-collection-1234.json b/node/test/fixtures/mocks/missing-collection-1234.json similarity index 100% rename from test/fixtures/mocks/missing-collection-1234.json rename to node/test/fixtures/mocks/missing-collection-1234.json diff --git a/test/fixtures/mocks/missing-fileset-1234.json b/node/test/fixtures/mocks/missing-fileset-1234.json similarity index 100% rename from test/fixtures/mocks/missing-fileset-1234.json rename to node/test/fixtures/mocks/missing-fileset-1234.json diff --git a/test/fixtures/mocks/missing-index.json b/node/test/fixtures/mocks/missing-index.json similarity index 100% rename from test/fixtures/mocks/missing-index.json rename to node/test/fixtures/mocks/missing-index.json diff --git a/test/fixtures/mocks/missing-shared-link-5678.json b/node/test/fixtures/mocks/missing-shared-link-5678.json similarity index 100% rename from test/fixtures/mocks/missing-shared-link-5678.json rename to node/test/fixtures/mocks/missing-shared-link-5678.json diff --git a/test/fixtures/mocks/missing-work-1234.json b/node/test/fixtures/mocks/missing-work-1234.json similarity index 100% rename from test/fixtures/mocks/missing-work-1234.json rename to node/test/fixtures/mocks/missing-work-1234.json diff --git a/test/fixtures/mocks/oai-list-identifiers-sets.json b/node/test/fixtures/mocks/oai-list-identifiers-sets.json similarity index 100% rename from test/fixtures/mocks/oai-list-identifiers-sets.json rename to node/test/fixtures/mocks/oai-list-identifiers-sets.json diff --git a/test/fixtures/mocks/oai-sets.json b/node/test/fixtures/mocks/oai-sets.json similarity index 100% rename from test/fixtures/mocks/oai-sets.json rename to node/test/fixtures/mocks/oai-sets.json diff --git a/test/fixtures/mocks/private-unpublished-work-1234.json b/node/test/fixtures/mocks/private-unpublished-work-1234.json similarity index 100% rename from test/fixtures/mocks/private-unpublished-work-1234.json rename to node/test/fixtures/mocks/private-unpublished-work-1234.json diff --git a/test/fixtures/mocks/private-work-1234.json b/node/test/fixtures/mocks/private-work-1234.json similarity index 100% rename from test/fixtures/mocks/private-work-1234.json rename to node/test/fixtures/mocks/private-work-1234.json diff --git a/test/fixtures/mocks/real-search-event.json b/node/test/fixtures/mocks/real-search-event.json similarity index 100% rename from test/fixtures/mocks/real-search-event.json rename to node/test/fixtures/mocks/real-search-event.json diff --git a/test/fixtures/mocks/scroll-empty.json b/node/test/fixtures/mocks/scroll-empty.json similarity index 100% rename from test/fixtures/mocks/scroll-empty.json rename to node/test/fixtures/mocks/scroll-empty.json diff --git a/test/fixtures/mocks/scroll-missing.json b/node/test/fixtures/mocks/scroll-missing.json similarity index 100% rename from test/fixtures/mocks/scroll-missing.json rename to node/test/fixtures/mocks/scroll-missing.json diff --git a/test/fixtures/mocks/scroll.json b/node/test/fixtures/mocks/scroll.json similarity index 100% rename from test/fixtures/mocks/scroll.json rename to node/test/fixtures/mocks/scroll.json diff --git a/test/fixtures/mocks/search-earliest-record.json b/node/test/fixtures/mocks/search-earliest-record.json similarity index 100% rename from test/fixtures/mocks/search-earliest-record.json rename to node/test/fixtures/mocks/search-earliest-record.json diff --git a/test/fixtures/mocks/search-multiple-targets.json b/node/test/fixtures/mocks/search-multiple-targets.json similarity index 100% rename from test/fixtures/mocks/search-multiple-targets.json rename to node/test/fixtures/mocks/search-multiple-targets.json diff --git a/test/fixtures/mocks/search.json b/node/test/fixtures/mocks/search.json similarity index 100% rename from test/fixtures/mocks/search.json rename to node/test/fixtures/mocks/search.json diff --git a/test/fixtures/mocks/shared-link-1234.json b/node/test/fixtures/mocks/shared-link-1234.json similarity index 100% rename from test/fixtures/mocks/shared-link-1234.json rename to node/test/fixtures/mocks/shared-link-1234.json diff --git a/test/fixtures/mocks/similar.json b/node/test/fixtures/mocks/similar.json similarity index 100% rename from test/fixtures/mocks/similar.json rename to node/test/fixtures/mocks/similar.json diff --git a/test/fixtures/mocks/thumbnail_full.jpg b/node/test/fixtures/mocks/thumbnail_full.jpg similarity index 100% rename from test/fixtures/mocks/thumbnail_full.jpg rename to node/test/fixtures/mocks/thumbnail_full.jpg diff --git a/test/fixtures/mocks/thumbnail_square.jpg b/node/test/fixtures/mocks/thumbnail_square.jpg similarity index 100% rename from test/fixtures/mocks/thumbnail_square.jpg rename to node/test/fixtures/mocks/thumbnail_square.jpg diff --git a/test/fixtures/mocks/unpublished-work-1234.json b/node/test/fixtures/mocks/unpublished-work-1234.json similarity index 100% rename from test/fixtures/mocks/unpublished-work-1234.json rename to node/test/fixtures/mocks/unpublished-work-1234.json diff --git a/test/fixtures/mocks/work-1234-no-collection.json b/node/test/fixtures/mocks/work-1234-no-collection.json similarity index 100% rename from test/fixtures/mocks/work-1234-no-collection.json rename to node/test/fixtures/mocks/work-1234-no-collection.json diff --git a/test/fixtures/mocks/work-1234-no-fileset-width-height.json b/node/test/fixtures/mocks/work-1234-no-fileset-width-height.json similarity index 100% rename from test/fixtures/mocks/work-1234-no-fileset-width-height.json rename to node/test/fixtures/mocks/work-1234-no-fileset-width-height.json diff --git a/test/fixtures/mocks/work-1234-no-thumbnail.json b/node/test/fixtures/mocks/work-1234-no-thumbnail.json similarity index 100% rename from test/fixtures/mocks/work-1234-no-thumbnail.json rename to node/test/fixtures/mocks/work-1234-no-thumbnail.json diff --git a/test/fixtures/mocks/work-1234.json b/node/test/fixtures/mocks/work-1234.json similarity index 100% rename from test/fixtures/mocks/work-1234.json rename to node/test/fixtures/mocks/work-1234.json diff --git a/test/fixtures/mocks/work-netid-1234.json b/node/test/fixtures/mocks/work-netid-1234.json similarity index 100% rename from test/fixtures/mocks/work-netid-1234.json rename to node/test/fixtures/mocks/work-netid-1234.json diff --git a/test/fixtures/mocks/work-restricted-1234.json b/node/test/fixtures/mocks/work-restricted-1234.json similarity index 100% rename from test/fixtures/mocks/work-restricted-1234.json rename to node/test/fixtures/mocks/work-restricted-1234.json diff --git a/test/fixtures/mocks/work-restricted-unpublished-1234.json b/node/test/fixtures/mocks/work-restricted-unpublished-1234.json similarity index 100% rename from test/fixtures/mocks/work-restricted-unpublished-1234.json rename to node/test/fixtures/mocks/work-restricted-unpublished-1234.json diff --git a/test/fixtures/mocks/work-video-5678.json b/node/test/fixtures/mocks/work-video-5678.json similarity index 100% rename from test/fixtures/mocks/work-video-5678.json rename to node/test/fixtures/mocks/work-video-5678.json diff --git a/test/integration/default-handler.test.js b/node/test/integration/default-handler.test.js similarity index 100% rename from test/integration/default-handler.test.js rename to node/test/integration/default-handler.test.js diff --git a/test/integration/get-auth-callback.test.js b/node/test/integration/get-auth-callback.test.js similarity index 100% rename from test/integration/get-auth-callback.test.js rename to node/test/integration/get-auth-callback.test.js diff --git a/test/integration/get-auth-login.test.js b/node/test/integration/get-auth-login.test.js similarity index 100% rename from test/integration/get-auth-login.test.js rename to node/test/integration/get-auth-login.test.js diff --git a/test/integration/get-auth-logout.test.js b/node/test/integration/get-auth-logout.test.js similarity index 100% rename from test/integration/get-auth-logout.test.js rename to node/test/integration/get-auth-logout.test.js diff --git a/test/integration/get-auth-whoami.test.js b/node/test/integration/get-auth-whoami.test.js similarity index 100% rename from test/integration/get-auth-whoami.test.js rename to node/test/integration/get-auth-whoami.test.js diff --git a/test/integration/get-collection-by-id.test.js b/node/test/integration/get-collection-by-id.test.js similarity index 100% rename from test/integration/get-collection-by-id.test.js rename to node/test/integration/get-collection-by-id.test.js diff --git a/test/integration/get-collections.test.js b/node/test/integration/get-collections.test.js similarity index 100% rename from test/integration/get-collections.test.js rename to node/test/integration/get-collections.test.js diff --git a/test/integration/get-doc.test.js b/node/test/integration/get-doc.test.js similarity index 100% rename from test/integration/get-doc.test.js rename to node/test/integration/get-doc.test.js diff --git a/test/integration/get-file-set-auth.test.js b/node/test/integration/get-file-set-auth.test.js similarity index 100% rename from test/integration/get-file-set-auth.test.js rename to node/test/integration/get-file-set-auth.test.js diff --git a/test/integration/get-file-set-download.test.js b/node/test/integration/get-file-set-download.test.js similarity index 100% rename from test/integration/get-file-set-download.test.js rename to node/test/integration/get-file-set-download.test.js diff --git a/test/integration/get-shared-link-by-id.test.js b/node/test/integration/get-shared-link-by-id.test.js similarity index 100% rename from test/integration/get-shared-link-by-id.test.js rename to node/test/integration/get-shared-link-by-id.test.js diff --git a/test/integration/get-similar.test.js b/node/test/integration/get-similar.test.js similarity index 100% rename from test/integration/get-similar.test.js rename to node/test/integration/get-similar.test.js diff --git a/test/integration/get-thumbnail.test.js b/node/test/integration/get-thumbnail.test.js similarity index 100% rename from test/integration/get-thumbnail.test.js rename to node/test/integration/get-thumbnail.test.js diff --git a/test/integration/get-work-auth.test.js b/node/test/integration/get-work-auth.test.js similarity index 100% rename from test/integration/get-work-auth.test.js rename to node/test/integration/get-work-auth.test.js diff --git a/test/integration/get-work-by-id.test.js b/node/test/integration/get-work-by-id.test.js similarity index 100% rename from test/integration/get-work-by-id.test.js rename to node/test/integration/get-work-by-id.test.js diff --git a/test/integration/middleware.test.js b/node/test/integration/middleware.test.js similarity index 100% rename from test/integration/middleware.test.js rename to node/test/integration/middleware.test.js diff --git a/test/integration/oai.test.js b/node/test/integration/oai.test.js similarity index 100% rename from test/integration/oai.test.js rename to node/test/integration/oai.test.js diff --git a/test/integration/options-request.test.js b/node/test/integration/options-request.test.js similarity index 100% rename from test/integration/options-request.test.js rename to node/test/integration/options-request.test.js diff --git a/test/integration/search.test.js b/node/test/integration/search.test.js similarity index 100% rename from test/integration/search.test.js rename to node/test/integration/search.test.js diff --git a/test/test-helpers/event-builder.js b/node/test/test-helpers/event-builder.js similarity index 100% rename from test/test-helpers/event-builder.js rename to node/test/test-helpers/event-builder.js diff --git a/test/test-helpers/index.js b/node/test/test-helpers/index.js similarity index 100% rename from test/test-helpers/index.js rename to node/test/test-helpers/index.js diff --git a/test/unit/api/api-token.test.js b/node/test/unit/api/api-token.test.js similarity index 100% rename from test/unit/api/api-token.test.js rename to node/test/unit/api/api-token.test.js diff --git a/test/unit/api/helpers.test.js b/node/test/unit/api/helpers.test.js similarity index 100% rename from test/unit/api/helpers.test.js rename to node/test/unit/api/helpers.test.js diff --git a/test/unit/api/opensearch.test.js b/node/test/unit/api/opensearch.test.js similarity index 100% rename from test/unit/api/opensearch.test.js rename to node/test/unit/api/opensearch.test.js diff --git a/test/unit/api/pagination.test.js b/node/test/unit/api/pagination.test.js similarity index 100% rename from test/unit/api/pagination.test.js rename to node/test/unit/api/pagination.test.js diff --git a/test/unit/api/request/models.test.js b/node/test/unit/api/request/models.test.js similarity index 100% rename from test/unit/api/request/models.test.js rename to node/test/unit/api/request/models.test.js diff --git a/test/unit/api/request/pipeline.test.js b/node/test/unit/api/request/pipeline.test.js similarity index 100% rename from test/unit/api/request/pipeline.test.js rename to node/test/unit/api/request/pipeline.test.js diff --git a/test/unit/api/response/error.test.js b/node/test/unit/api/response/error.test.js similarity index 100% rename from test/unit/api/response/error.test.js rename to node/test/unit/api/response/error.test.js diff --git a/test/unit/api/response/iiif/collection.test.js b/node/test/unit/api/response/iiif/collection.test.js similarity index 100% rename from test/unit/api/response/iiif/collection.test.js rename to node/test/unit/api/response/iiif/collection.test.js diff --git a/test/unit/api/response/iiif/manifest.test.js b/node/test/unit/api/response/iiif/manifest.test.js similarity index 100% rename from test/unit/api/response/iiif/manifest.test.js rename to node/test/unit/api/response/iiif/manifest.test.js diff --git a/test/unit/api/response/iiif/presentation-api/items.test.js b/node/test/unit/api/response/iiif/presentation-api/items.test.js similarity index 100% rename from test/unit/api/response/iiif/presentation-api/items.test.js rename to node/test/unit/api/response/iiif/presentation-api/items.test.js diff --git a/test/unit/api/response/iiif/presentation-api/metadata.test.js b/node/test/unit/api/response/iiif/presentation-api/metadata.test.js similarity index 100% rename from test/unit/api/response/iiif/presentation-api/metadata.test.js rename to node/test/unit/api/response/iiif/presentation-api/metadata.test.js diff --git a/test/unit/api/response/iiif/presentation-api/placeholder-canvas.test.js b/node/test/unit/api/response/iiif/presentation-api/placeholder-canvas.test.js similarity index 100% rename from test/unit/api/response/iiif/presentation-api/placeholder-canvas.test.js rename to node/test/unit/api/response/iiif/presentation-api/placeholder-canvas.test.js diff --git a/test/unit/api/response/opensearch.test.js b/node/test/unit/api/response/opensearch.test.js similarity index 100% rename from test/unit/api/response/opensearch.test.js rename to node/test/unit/api/response/opensearch.test.js diff --git a/test/unit/aws/environment.test.js b/node/test/unit/aws/environment.test.js similarity index 100% rename from test/unit/aws/environment.test.js rename to node/test/unit/aws/environment.test.js diff --git a/test/unit/package.test.js b/node/test/unit/package.test.js similarity index 100% rename from test/unit/package.test.js rename to node/test/unit/package.test.js diff --git a/test/unit/redirect.test.js b/node/test/unit/redirect.test.js similarity index 100% rename from test/unit/redirect.test.js rename to node/test/unit/redirect.test.js diff --git a/python/requirements.txt b/python/requirements.txt new file mode 100644 index 00000000..dbc3251b --- /dev/null +++ b/python/requirements.txt @@ -0,0 +1,2 @@ +-r src/requirements.txt +ruff diff --git a/python/src/handlers/hello.py b/python/src/handlers/hello.py new file mode 100644 index 00000000..d4c79981 --- /dev/null +++ b/python/src/handlers/hello.py @@ -0,0 +1,11 @@ +import os + +def lambda_handler(event, context): + name = event.get("queryStringParameters", {}).get("name", os.getenv("DEFAULT_NAME", "No One")) + return { + "statusCode": 200, + "headers": { + "Content-Type": "text/plain" + }, + "body": f"Hello, {name}" + } \ No newline at end of file diff --git a/python/src/handlers/hello.test.py b/python/src/handlers/hello.test.py new file mode 100644 index 00000000..f45b0277 --- /dev/null +++ b/python/src/handlers/hello.test.py @@ -0,0 +1,14 @@ +import unittest + +function = __import__('hello') +handler = function.lambda_handler + +class TestFunction(unittest.TestCase): + def test_function(self): + event = {'queryStringParameters': {'name': 'Joe'}} + context = {'requestid' : '1234'} + result = handler(event, context) + self.assertEqual(str(result), "{'statusCode': 200, 'headers': {'Content-Type': 'text/plain'}, 'body': 'Hello, Joe'}") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file diff --git a/python/src/requirements.txt b/python/src/requirements.txt new file mode 100644 index 00000000..f85bf804 --- /dev/null +++ b/python/src/requirements.txt @@ -0,0 +1,6 @@ +langchain~=0.0.208 +nbformat~=5.9.0 +openai~=0.27.8 +pandas~=2.0.2 +python-dotenv~=1.0.0 +weaviate-client~=3.19.2 diff --git a/template.yaml b/template.yaml index eb4726bb..c2fb2bb3 100644 --- a/template.yaml +++ b/template.yaml @@ -7,7 +7,7 @@ Description: > # More info about Globals: https://github.com/awslabs/serverless-application-model/blob/master/docs/globals.rst Globals: Function: - CodeUri: ./src + CodeUri: ./node/src Runtime: nodejs16.x Architectures: - x86_64 @@ -631,6 +631,22 @@ Resources: ApiId: !Ref dcApi Path: /oai Method: POST + helloWorldFunction: + Type: AWS::Serverless::Function + Properties: + CodeUri: ./python/src + Runtime: python3.9 + Handler: handlers/hello.lambda_handler + Environment: + Variables: + DEFAULT_NAME: "World" + Events: + GetApiGet: + Type: HttpApi + Properties: + ApiId: !Ref dcApi + Path: /hello + Method: GET defaultFunction: Type: AWS::Serverless::Function Properties: @@ -955,71 +971,71 @@ Resources: Type: AWS::Serverless::HttpApi Properties: StageName: latest - rootRedirect: - Type: AWS::Serverless::Function - Properties: - CodeUri: ./redirect - Handler: index.handler - Timeout: 1 - Description: Redirects to latest version of docs - Environment: - Variables: - REDIRECT_TO: /docs/v2/index.html - Events: - RedirectApiGet: - Type: HttpApi - Properties: - ApiId: !Ref rootApi - Path: / - Method: GET - RedirectApiHead: - Type: HttpApi - Properties: - ApiId: !Ref rootApi - Path: / - Method: HEAD + # rootRedirect: + # Type: AWS::Serverless::Function + # Properties: + # CodeUri: ./redirect + # Handler: index.handler + # Timeout: 1 + # Description: Redirects to latest version of docs + # Environment: + # Variables: + # REDIRECT_TO: /docs/v2/index.html + # Events: + # RedirectApiGet: + # Type: HttpApi + # Properties: + # ApiId: !Ref rootApi + # Path: / + # Method: GET + # RedirectApiHead: + # Type: HttpApi + # Properties: + # ApiId: !Ref rootApi + # Path: / + # Method: HEAD # Documentation - docsMapping: - Type: AWS::ApiGatewayV2::ApiMapping - Properties: - DomainName: !Sub "${CustomDomainHost}.${CustomDomainZone}" - ApiId: !Ref rootApi - Stage: !Ref rootApilatestStage - DependsOn: dcApi - docsBucket: - Type: AWS::S3::Bucket - Properties: - BucketName: !Sub "${CustomDomainHost}-docs.${CustomDomainZone}" - AccessControl: PublicRead - WebsiteConfiguration: - IndexDocument: index.html - ErrorDocument: index.html - docsBucketPolicy: - Type: AWS::S3::BucketPolicy - Properties: - PolicyDocument: - Id: MyPolicy - Version: 2012-10-17 - Statement: - - Sid: PublicReadForGetBucketObjects - Effect: Allow - Principal: "*" - Action: "s3:GetObject" - Resource: !Sub "arn:aws:s3:::${docsBucket}/*" - Bucket: !Ref docsBucket - docsIntegration: - Type: AWS::ApiGatewayV2::Integration - Properties: - ApiId: !Ref rootApi - IntegrationMethod: GET - IntegrationType: HTTP_PROXY - IntegrationUri: !Sub "http://${docsBucket}.s3-website-us-east-1.amazonaws.com/{proxy}" - PayloadFormatVersion: "1.0" - docsRoute: - Type: AWS::ApiGatewayV2::Route - Properties: - ApiId: !Ref rootApi - AuthorizationType: NONE - RouteKey: GET /docs/v2/{proxy+} - Target: !Sub "integrations/${docsIntegration}" + # docsMapping: + # Type: AWS::ApiGatewayV2::ApiMapping + # Properties: + # DomainName: !Sub "${CustomDomainHost}.${CustomDomainZone}" + # ApiId: !Ref rootApi + # Stage: !Ref rootApilatestStage + # DependsOn: dcApi + # docsBucket: + # Type: AWS::S3::Bucket + # Properties: + # BucketName: !Sub "${CustomDomainHost}-docs.${CustomDomainZone}" + # AccessControl: PublicRead + # WebsiteConfiguration: + # IndexDocument: index.html + # ErrorDocument: index.html + # docsBucketPolicy: + # Type: AWS::S3::BucketPolicy + # Properties: + # PolicyDocument: + # Id: MyPolicy + # Version: 2012-10-17 + # Statement: + # - Sid: PublicReadForGetBucketObjects + # Effect: Allow + # Principal: "*" + # Action: "s3:GetObject" + # Resource: !Sub "arn:aws:s3:::${docsBucket}/*" + # Bucket: !Ref docsBucket + # docsIntegration: + # Type: AWS::ApiGatewayV2::Integration + # Properties: + # ApiId: !Ref rootApi + # IntegrationMethod: GET + # IntegrationType: HTTP_PROXY + # IntegrationUri: !Sub "http://${docsBucket}.s3-website-us-east-1.amazonaws.com/{proxy}" + # PayloadFormatVersion: "1.0" + # docsRoute: + # Type: AWS::ApiGatewayV2::Route + # Properties: + # ApiId: !Ref rootApi + # AuthorizationType: NONE + # RouteKey: GET /docs/v2/{proxy+} + # Target: !Sub "integrations/${docsIntegration}" From 31a3ae0a1f387236d7eab75ef9009858a435eb7d Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Thu, 22 Jun 2023 00:14:38 +0000 Subject: [PATCH 2/6] Build, deploy, and test coverage --- .github/workflows/deploy.yml | 2 ++ .github/workflows/test-python.yml | 12 ++++++------ .gitignore | 1 + .husky/pre-commit | 4 ++-- python/requirements.txt | 2 ++ python/src/__init__.py | 0 python/src/handlers/hello.py | 3 ++- python/src/handlers/hello.test.py | 14 -------------- python/test/__init__.py | 0 python/test/handlers/__init__.py | 0 python/test/handlers/test_hello.py | 9 +++++++++ 11 files changed, 24 insertions(+), 23 deletions(-) create mode 100644 python/src/__init__.py delete mode 100644 python/src/handlers/hello.test.py create mode 100644 python/test/__init__.py create mode 100644 python/test/handlers/__init__.py create mode 100644 python/test/handlers/test_hello.py diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c9e262c9..3e0792d5 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -42,6 +42,8 @@ jobs: ref: main path: ".tfvars" - uses: actions/setup-python@v2 + with: + python-version: '3.9' - uses: aws-actions/setup-sam@v1 - name: sam fix https://github.com/aws/aws-sam-cli/issues/4527 run: $(dirname $(readlink $(which sam)))/pip install --force-reinstall "cryptography==38.0.4" diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index bfc7a8ac..69a0d832 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -18,12 +18,12 @@ jobs: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: - python-version: '3.10' + python-version: '3.9' cache-dependency-path: python/requirements.txt - - run: npm ci + - run: pip install -r requirements.txt - name: Check code style - run: npm run lint && npm run prettier + run: ruff check . - name: Run tests - run: npm run test:coverage - - name: Validate OpenAPI spec - run: npm run validate-spec + run: | + coverage run -m unittest + coverage report diff --git a/.gitignore b/.gitignore index 50cd1c55..cfbf6386 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ yarn-error.log* lerna-debug.log* ### Python ### +.coverage __pycache__/ *.py[cod] *$py.class diff --git a/.husky/pre-commit b/.husky/pre-commit index 66741865..0fc6b0d5 100755 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1,4 +1,4 @@ #!/usr/bin/env sh . "$(dirname -- "$0")/_/husky.sh" -cd node -npm run lint && npm run prettier +cd node && npm run lint && npm run prettier && cd - +cd python && ruff check . && cd - diff --git a/python/requirements.txt b/python/requirements.txt index dbc3251b..c7a7c987 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,2 +1,4 @@ -r src/requirements.txt +coverage ruff +wheel \ No newline at end of file diff --git a/python/src/__init__.py b/python/src/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/src/handlers/hello.py b/python/src/handlers/hello.py index d4c79981..5d4e5bd3 100644 --- a/python/src/handlers/hello.py +++ b/python/src/handlers/hello.py @@ -1,7 +1,8 @@ import os def lambda_handler(event, context): - name = event.get("queryStringParameters", {}).get("name", os.getenv("DEFAULT_NAME", "No One")) + params = event.get("queryStringParameters", {}) + name = params.get("name", os.getenv("DEFAULT_NAME", "No One")) return { "statusCode": 200, "headers": { diff --git a/python/src/handlers/hello.test.py b/python/src/handlers/hello.test.py deleted file mode 100644 index f45b0277..00000000 --- a/python/src/handlers/hello.test.py +++ /dev/null @@ -1,14 +0,0 @@ -import unittest - -function = __import__('hello') -handler = function.lambda_handler - -class TestFunction(unittest.TestCase): - def test_function(self): - event = {'queryStringParameters': {'name': 'Joe'}} - context = {'requestid' : '1234'} - result = handler(event, context) - self.assertEqual(str(result), "{'statusCode': 200, 'headers': {'Content-Type': 'text/plain'}, 'body': 'Hello, Joe'}") - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/python/test/__init__.py b/python/test/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/test/handlers/__init__.py b/python/test/handlers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/python/test/handlers/test_hello.py b/python/test/handlers/test_hello.py new file mode 100644 index 00000000..b94b1c11 --- /dev/null +++ b/python/test/handlers/test_hello.py @@ -0,0 +1,9 @@ +import unittest +from src.handlers import hello + +class TestFunction(unittest.TestCase): + def test_function(self): + event = {'queryStringParameters': {'name': 'Joe'}} + context = {'requestid' : '1234'} + result = hello.lambda_handler(event, context) + self.assertEqual(result['body'], 'Hello, Joe') From be2b386be67301cd4d645d9c78c6dd06d3834b9f Mon Sep 17 00:00:00 2001 From: Karen Shaw Date: Thu, 22 Jun 2023 15:48:26 +0000 Subject: [PATCH 3/6] Begin chat handler Add initial chat handler Add Makefile to make ongoing development easier Parameterize setup and chat; continue to build out prototype Add cookie and bearer auth to chat endpoint Fix ruff code style errors Fix deploy paths Remove prints from chat lambda Turn a POSTed question into a valid JSON response from the LLM Add certainty in result CORS --- .github/workflows/deploy.yml | 4 +- Makefile | 50 +++++++++++++++++++++ python/requirements.txt | 3 +- python/src/handlers/chat.py | 85 ++++++++++++++++++++++++++++++++++++ python/src/requirements.txt | 2 + python/src/setup.py | 44 +++++++++++++++++++ template.yaml | 40 +++++++++++++++++ 7 files changed, 224 insertions(+), 4 deletions(-) create mode 100644 Makefile create mode 100644 python/src/handlers/chat.py create mode 100644 python/src/setup.py diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 3e0792d5..0e9a5797 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -6,8 +6,8 @@ on: - main paths: - ".github/workflows/deploy.yml" - - "node/*" - - "python/*" + - "node/**" + - "python/**" - "template.yaml" workflow_dispatch: concurrency: diff --git a/Makefile b/Makefile new file mode 100644 index 00000000..60376884 --- /dev/null +++ b/Makefile @@ -0,0 +1,50 @@ +ifndef VERBOSE +.SILENT: +endif +ENV=dev + +help: + echo "make build | build the SAM project" + echo "make serve | run the SAM server locally" + echo "make clean | remove all installed dependencies and build artifacts" + echo "make deps | install all dependencies" + echo "make link | create hard links to allow for hot reloading of a built project" + echo "make secrets | symlink secrets files from ../tfvars" + echo "make test | run all tests" + echo "make cover | run all tests with coverage" + echo "make env ENV=[env] | activate env.\$$ENV.json file (default: dev)" + echo "make deps-node | install node dependencies" + echo "make deps-python | install python dependencies" + echo "make test-node | run node tests" + echo "make test-python | run python tests" + echo "make cover-node | run node tests with coverage" + echo "make cover-python | run python tests with coverage" +.aws-sam/build.toml: ./template.yaml node/package-lock.json node/src/package-lock.json python/requirements.txt python/src/requirements.txt + sam build --cached --parallel +deps-node: + cd node && npm ci +cover-node: + cd node && npm run test:coverage +test-node: + cd node && npm run test +deps-python: + cd python && pip install -r requirements.txt +cover-python: + cd python && coverage run -m unittest && coverage report +test-python: + cd python && python -m unittest +build: .aws-sam/build.toml +link: build + cd python/src && for src in *.py **/*.py; do for target in $$(find ../../.aws-sam/build -maxdepth 1 -type d); do if [[ -f $$target/$$src ]]; then ln -f $$src $$target/$$src; fi; done; done + cd node/src && for src in *.js *.json **/*.js **/*.json; do for target in $$(find ../../.aws-sam/build -maxdepth 1 -type d); do if [[ -f $$target/$$src ]]; then ln -f $$src $$target/$$src; fi; done; done +serve: link + sam local start-api --host 0.0.0.0 --log-file dc-api.log +deps: deps-node deps-python +test: test-node test-python +cover: cover-node cover-python +env: + ln -fs ./env.${ENV}.json ./env.json +secrets: + ln -s ../tfvars/dc-api/* . +clean: + rm -rf .aws-sam node/node_modules node/src/node_modules python/**/__pycache__ python/.coverage python/.ruff_cache diff --git a/python/requirements.txt b/python/requirements.txt index c7a7c987..074747e7 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,4 +1,3 @@ -r src/requirements.txt coverage -ruff -wheel \ No newline at end of file +ruff \ No newline at end of file diff --git a/python/src/handlers/chat.py b/python/src/handlers/chat.py new file mode 100644 index 00000000..8ba0ae69 --- /dev/null +++ b/python/src/handlers/chat.py @@ -0,0 +1,85 @@ +# ruff: noqa: E501 +import base64 +import json +import os +import setup +from langchain.chains import RetrievalQAWithSourcesChain + +def handler(event, context): + if not is_authenticated(event): + return { + "statusCode": 401, + "headers": { + "Content-Type": "text/plain" + }, + "body": "Unauthorized" + } + question = get_query(event) + index_name = get_param(event, "index", "Work") + text_key = get_param(event, "text_key", "title") + attributes = get_param(event, + "attributes", + "identifier,title,source,alternate_title,contributor,create_date,creator,date_created,description,genre,keywords,language,location,physical_description_material,physical_description_size,scope_and_contents,style_period,subject,table_of_contents,technique,work_type").split(",") + + weaviate = setup.weaviate_vector_store(index_name=index_name, + text_key=text_key, + attributes=attributes) + + client = setup.openai_chat_client() + + + chain = RetrievalQAWithSourcesChain.from_chain_type( + client, + chain_type="stuff", + retriever=weaviate.as_retriever(search_kwargs=dict(additional="certainty")), + return_source_documents=True) + + response = chain({"question": question}) + print(response) + response['source_documents'] = [doc.__dict__ for doc in response['source_documents']] + return { + "statusCode": 200, + "headers": { + "Content-Type": "application/json", + "access-control-allow-methods": "POST, GET", + "access-control-allow-credentials": True, + "access-control-max-age": 600, + "access-control-allow-origin": get_header(event, "Origin", "*"), + "access-control-allow-headers": "Accept, Accept-Charset, Accept-Encoding, Accept-Language, Accept-Datetime, Authorization, Cache-Control, Content-Length, Content-Type, Cookie, Date, Expect, Host, If-Match, If-Modified-Since, If-None-Match, If-Range, If-Unmodified-Since, Origin, Pragma, Range, Referer, User-Agent, X-CSRF-Token, X-Forwarded-For, X-Forwarded-Host, X-Forwarded-Port, X-Requested-With" + }, + "body": json.dumps(response) + } + +def get_header(event, header, default=None): + headers = event.get("headers") + return headers.get(header, headers.get(header.lower(), default)) + +def get_param(event, parameter, default): + params = event.get("queryStringParameters", {}) + return params.get(parameter, default) + + +def get_query(event): + question = event.get("body", "") + if event.get("isBase64Encoded", False): + question = base64.b64decode(question) + return question + + +def is_authenticated(event): + token = get_header(event, "Authorization") + + if token is None: + for cookie in event.get("cookies", []): + [k, v] = cookie.split("=", 1) + if k == os.getenv("API_TOKEN_NAME"): + token = v + else: + token = token.replace("Bearer ", "") + + return setup.validate_token(token) + + + +# result = weaviate.similarity_search_by_text(query=question, +# additional="certainty") \ No newline at end of file diff --git a/python/src/requirements.txt b/python/src/requirements.txt index f85bf804..b3b93bd6 100644 --- a/python/src/requirements.txt +++ b/python/src/requirements.txt @@ -2,5 +2,7 @@ langchain~=0.0.208 nbformat~=5.9.0 openai~=0.27.8 pandas~=2.0.2 +pyjwt~=2.6.0 python-dotenv~=1.0.0 weaviate-client~=3.19.2 +wheel~=0.40.0 \ No newline at end of file diff --git a/python/src/setup.py b/python/src/setup.py new file mode 100644 index 00000000..d9cb9ad6 --- /dev/null +++ b/python/src/setup.py @@ -0,0 +1,44 @@ +from langchain.chat_models import AzureChatOpenAI +from langchain.vectorstores import Weaviate +from typing import List +import os +import jwt +import weaviate + +def openai_chat_client(): + deployment = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_ID") + key = os.getenv("AZURE_OPENAI_API_KEY") + resource = os.getenv("AZURE_OPENAI_RESOURCE_NAME") + + return AzureChatOpenAI(deployment_name=deployment, + openai_api_key=key, + openai_api_base=f"https://{resource}.openai.azure.com/", + openai_api_version="2023-03-15-preview") + + +def weaviate_vector_store(index_name: str, text_key: str, attributes: List[str] = []): + weaviate_url = os.environ['WEAVIATE_URL'] + weaviate_api_key = os.environ['WEAVIATE_API_KEY'] + # openai_api_key = os.environ['AZURE_OPENAI_API_KEY'] + + auth_config = weaviate.AuthApiKey(api_key=weaviate_api_key) + + client = weaviate.Client( + url=weaviate_url, + auth_client_secret=auth_config + ) + return Weaviate(client=client, + index_name=index_name, + text_key=text_key, + attributes=attributes) + + +def validate_token(token): + secret = os.getenv("API_TOKEN_SECRET") + try: + claim = jwt.decode(token, secret, algorithms=["HS256"]) + print(f"CLAIM: {claim}") + return claim.get("isLoggedIn", False) + except Exception as e: + print(e) + return False \ No newline at end of file diff --git a/template.yaml b/template.yaml index c2fb2bb3..8dccdd16 100644 --- a/template.yaml +++ b/template.yaml @@ -32,6 +32,18 @@ Parameters: ApiTokenSecret: Type: String Description: Secret Key for Encrypting JWTs (must match IIIF server) + AzureOpenaiApiKey: + Type: String + Description: Azure OpenAI API Key + AzureOpenaiEmbeddingDeploymentId: + Type: String + Description: Azure OpenAI Embedding Deployment ID + AzureOpenaiLlmDeploymentId: + Type: String + Description: Azure OpenAI LLM Deployment ID + AzureOpenaiResourceName: + Type: String + Description: Azure OpenAI Resource Name CustomDomainCertificateArn: Type: String Description: SSL Certificate for the Custom Domain Name @@ -97,6 +109,12 @@ Parameters: StreamingBucket: Type: String Description: Meadow streaming bucket + WeaviateApiKey: + Type: String + Description: Weaviate API Key + WeaviateUrl: + Type: String + Description: Weaviate URL Resources: apiDependencies: Type: AWS::Serverless::LayerVersion @@ -647,6 +665,28 @@ Resources: ApiId: !Ref dcApi Path: /hello Method: GET + chatFunction: + Type: AWS::Serverless::Function + Properties: + CodeUri: ./python/src + Runtime: python3.9 + Handler: handlers/chat.handler + Timeout: 300 + Environment: + Variables: + AZURE_OPENAI_API_KEY: !Ref AzureOpenaiApiKey + AZURE_OPENAI_EMBEDDING_DEPLOYMENT_ID: !Ref AzureOpenaiEmbeddingDeploymentId + AZURE_OPENAI_LLM_DEPLOYMENT_ID: !Ref AzureOpenaiLlmDeploymentId + AZURE_OPENAI_RESOURCE_NAME: !Ref AzureOpenaiResourceName + WEAVIATE_API_KEY: !Ref WeaviateApiKey + WEAVIATE_URL: !Ref WeaviateUrl + Events: + PostApi: + Type: HttpApi + Properties: + ApiId: !Ref dcApi + Path: /chat + Method: POST defaultFunction: Type: AWS::Serverless::Function Properties: From a93b09542740471bdaab82a84654ce47cb2232d1 Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Mon, 26 Jun 2023 20:59:22 +0000 Subject: [PATCH 4/6] Answer real questions using one-shot prompting Add a custom document prompt to the chain overriding the default Split event and token handling into helpers Add python tests & fix code style Build document prompt dynamically using requested attributes Update chat code to replace identifier with source Update the prompts for updated schema, try CORS fix Fix ruff style issue Add origin header logging to chat handler, troubleshooting CORS issue Try another approach for event cookie parsing Fix ruff style issue Update prototype to GPT-4 Integrate chat websocket stack Remove non-streaming chat handler Move python dependencies to a layer Restore python tests --- .github/workflows/deploy.yml | 4 +- .github/workflows/test-python.yml | 9 +- .github/workflows/validate-template.yml | 3 + .gitignore | 2 + .husky/pre-commit | 2 +- Makefile | 16 +- .../dependencies}/requirements.txt | 1 + {python => chat}/src/__init__.py | 0 chat/src/handlers/chat.py | 117 +++++++++ chat/src/helpers/apitoken.py | 28 +++ chat/src/helpers/prompts.py | 153 +++++++++++ chat/src/requirements.txt | 14 ++ {python => chat}/src/setup.py | 19 +- chat/template.yaml | 237 ++++++++++++++++++ {python => chat}/test/__init__.py | 0 chat/test/fixtures/apitoken.py | 5 + chat/test/fixtures/events.py | 56 +++++ {python => chat}/test/handlers/__init__.py | 0 chat/test/helpers/__init__.py | 0 chat/test/helpers/test_apitoken.py | 24 ++ node/package.json | 2 +- node/src/handlers/get-chat-endpoint.js | 21 ++ .../integration/get-chat-endpoint.test.js | 31 +++ node/test/integration/oai.test.js | 1 - node/test/test-helpers/index.js | 1 + python/requirements.txt | 3 - python/src/handlers/chat.py | 85 ------- python/src/handlers/hello.py | 12 - python/test/handlers/test_hello.py | 9 - template.yaml | 48 ++-- 30 files changed, 738 insertions(+), 165 deletions(-) rename {python/src => chat/dependencies}/requirements.txt (89%) rename {python => chat}/src/__init__.py (100%) create mode 100644 chat/src/handlers/chat.py create mode 100644 chat/src/helpers/apitoken.py create mode 100644 chat/src/helpers/prompts.py create mode 100644 chat/src/requirements.txt rename {python => chat}/src/setup.py (73%) create mode 100644 chat/template.yaml rename {python => chat}/test/__init__.py (100%) create mode 100644 chat/test/fixtures/apitoken.py create mode 100644 chat/test/fixtures/events.py rename {python => chat}/test/handlers/__init__.py (100%) create mode 100644 chat/test/helpers/__init__.py create mode 100644 chat/test/helpers/test_apitoken.py create mode 100644 node/src/handlers/get-chat-endpoint.js create mode 100644 node/test/integration/get-chat-endpoint.test.js delete mode 100644 python/requirements.txt delete mode 100644 python/src/handlers/chat.py delete mode 100644 python/src/handlers/hello.py delete mode 100644 python/test/handlers/test_hello.py diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 0e9a5797..27a4a28e 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -43,10 +43,8 @@ jobs: path: ".tfvars" - uses: actions/setup-python@v2 with: - python-version: '3.9' + python-version: '3.10' - uses: aws-actions/setup-sam@v1 - - name: sam fix https://github.com/aws/aws-sam-cli/issues/4527 - run: $(dirname $(readlink $(which sam)))/pip install --force-reinstall "cryptography==38.0.4" - uses: aws-actions/configure-aws-credentials@master with: role-to-assume: arn:aws:iam::${{ secrets.AwsAccount }}:role/github-actions-role diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index 69a0d832..ff995d1c 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -3,11 +3,11 @@ on: push: paths: - ".github/workflows/test-python.yml" - - "python/**" + - "chat/**" workflow_dispatch: defaults: run: - working-directory: ./python + working-directory: ./chat jobs: test: runs-on: ubuntu-latest @@ -19,11 +19,12 @@ jobs: - uses: actions/setup-python@v4 with: python-version: '3.9' - cache-dependency-path: python/requirements.txt + cache-dependency-path: chat/src/requirements.txt - run: pip install -r requirements.txt + working-directory: ./chat/src - name: Check code style run: ruff check . - name: Run tests run: | - coverage run -m unittest + coverage run --include='src/**/*' -m unittest coverage report diff --git a/.github/workflows/validate-template.yml b/.github/workflows/validate-template.yml index 7625850a..9eda334e 100644 --- a/.github/workflows/validate-template.yml +++ b/.github/workflows/validate-template.yml @@ -13,6 +13,9 @@ jobs: contents: read environment: test steps: + - uses: actions/setup-python@v4 + with: + python-version: '3.9' - uses: aws-actions/setup-sam@v1 - name: sam fix https://github.com/aws/aws-sam-cli/issues/4527 run: $(dirname $(readlink $(which sam)))/pip install --force-reinstall "cryptography==38.0.4" diff --git a/.gitignore b/.gitignore index cfbf6386..adf3345d 100644 --- a/.gitignore +++ b/.gitignore @@ -222,6 +222,8 @@ $RECYCLE.BIN/ .vscode /samconfig.toml +/samconfig.yaml +/samconfig.*.yaml /env.json /env.*.json /*.parameters diff --git a/.husky/pre-commit b/.husky/pre-commit index 0fc6b0d5..01456dba 100755 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1,4 +1,4 @@ #!/usr/bin/env sh . "$(dirname -- "$0")/_/husky.sh" cd node && npm run lint && npm run prettier && cd - -cd python && ruff check . && cd - +cd chat/src && ruff check . && cd - diff --git a/Makefile b/Makefile index 60376884..41f41972 100644 --- a/Makefile +++ b/Makefile @@ -10,11 +10,14 @@ help: echo "make deps | install all dependencies" echo "make link | create hard links to allow for hot reloading of a built project" echo "make secrets | symlink secrets files from ../tfvars" + echo "make style | run all style checks" echo "make test | run all tests" echo "make cover | run all tests with coverage" echo "make env ENV=[env] | activate env.\$$ENV.json file (default: dev)" echo "make deps-node | install node dependencies" echo "make deps-python | install python dependencies" + echo "make style-node | run node code style check" + echo "make style-python | run python code style check" echo "make test-node | run node tests" echo "make test-python | run python tests" echo "make cover-node | run node tests with coverage" @@ -25,21 +28,26 @@ deps-node: cd node && npm ci cover-node: cd node && npm run test:coverage +style-node: + cd node && npm run prettier test-node: cd node && npm run test deps-python: - cd python && pip install -r requirements.txt + cd chat/src && pip install -r requirements.txt cover-python: - cd python && coverage run -m unittest && coverage report + cd chat/src && coverage run --include='src/**/*' -m unittest -v && coverage report +style-python: + cd chat && ruff check . test-python: - cd python && python -m unittest + cd chat && python -m unittest -v build: .aws-sam/build.toml link: build - cd python/src && for src in *.py **/*.py; do for target in $$(find ../../.aws-sam/build -maxdepth 1 -type d); do if [[ -f $$target/$$src ]]; then ln -f $$src $$target/$$src; fi; done; done + cd chat/src && for src in *.py **/*.py; do for target in $$(find ../../.aws-sam/build -maxdepth 1 -type d); do if [[ -f $$target/$$src ]]; then ln -f $$src $$target/$$src; fi; done; done cd node/src && for src in *.js *.json **/*.js **/*.json; do for target in $$(find ../../.aws-sam/build -maxdepth 1 -type d); do if [[ -f $$target/$$src ]]; then ln -f $$src $$target/$$src; fi; done; done serve: link sam local start-api --host 0.0.0.0 --log-file dc-api.log deps: deps-node deps-python +style: style-node style-python test: test-node test-python cover: cover-node cover-python env: diff --git a/python/src/requirements.txt b/chat/dependencies/requirements.txt similarity index 89% rename from python/src/requirements.txt rename to chat/dependencies/requirements.txt index b3b93bd6..68ec56c3 100644 --- a/python/src/requirements.txt +++ b/chat/dependencies/requirements.txt @@ -4,5 +4,6 @@ openai~=0.27.8 pandas~=2.0.2 pyjwt~=2.6.0 python-dotenv~=1.0.0 +tiktoken~=0.4.0 weaviate-client~=3.19.2 wheel~=0.40.0 \ No newline at end of file diff --git a/python/src/__init__.py b/chat/src/__init__.py similarity index 100% rename from python/src/__init__.py rename to chat/src/__init__.py diff --git a/chat/src/handlers/chat.py b/chat/src/handlers/chat.py new file mode 100644 index 00000000..d38a4bf0 --- /dev/null +++ b/chat/src/handlers/chat.py @@ -0,0 +1,117 @@ +import boto3 +import json +import os +import setup +from helpers.apitoken import ApiToken +from helpers.prompts import document_template, prompt_template +from langchain.callbacks.base import BaseCallbackHandler +from langchain.chains.qa_with_sources import load_qa_with_sources_chain +from langchain.prompts import PromptTemplate +from openai.error import InvalidRequestError + +DEFAULT_INDEX = "Work" +DEFAULT_KEY = "title" +DEFAULT_ATTRIBUTES = ("title,alternate_title,collection,contributor,creator," + "date_created,description,genre,language,library_unit," + "location,physical_description_material,physical_description_size," + "published,rights_statement,scope_and_contents,series,source," + "style_period,subject,table_of_contents,technique,visibility," + "work_type") + +class Websocket: + def __init__(self, endpoint_url, connection_id, ref): + self.client = boto3.client('apigatewaymanagementapi', endpoint_url=endpoint_url) + self.connection_id = connection_id + self.ref = ref + + def send(self, data): + data['ref'] = self.ref + data_as_bytes = bytes(json.dumps(data), 'utf-8') + self.client.post_to_connection(Data=data_as_bytes, ConnectionId=self.connection_id) + +class StreamingSocketCallbackHandler(BaseCallbackHandler): + def __init__(self, socket: Websocket): + self.socket = socket + + def on_llm_new_token(self, token: str, **kwargs): + self.socket.send({'token': token}) + +def handler(event, context): + try: + payload = json.loads(event.get('body', '{}')) + + request_context = event.get('requestContext', {}) + connection_id = request_context.get('connectionId') + endpoint_url = f'https://{request_context.get("domainName")}/{request_context.get("stage")}' + ref = payload.get('ref') + socket = Websocket(connection_id=connection_id, endpoint_url=endpoint_url, ref=ref) + + + api_token = ApiToken(signed_token=payload.get("auth")) + if not api_token.is_logged_in(): + socket.send({ "statusCode": 401, "body": "Unauthorized" }) + return { + "statusCode": 401, + "body": "Unauthorized" + } + + question = payload.get("question") + index_name = payload.get("index", DEFAULT_INDEX) + text_key = payload.get("text_key", DEFAULT_KEY) + attributes = [ + item for item + in set(payload.get("attributes", DEFAULT_ATTRIBUTES).split(",")) + if item not in [text_key, "source"] + ] + + weaviate = setup.weaviate_vector_store(index_name=index_name, + text_key=text_key, + attributes=attributes + ["source"]) + + client = setup.openai_chat_client(callbacks=[StreamingSocketCallbackHandler(socket)], streaming=True) + + prompt = PromptTemplate( + template=prompt_template(), + input_variables=["question", "context"] + ) + + document_prompt = PromptTemplate( + template=document_template(attributes), + input_variables=["page_content", "source"] + attributes, + ) + + docs = weaviate.similarity_search(question, k=10, additional="certainty") + chain = load_qa_with_sources_chain( + client, + chain_type="stuff", + prompt=prompt, + document_prompt=document_prompt, + document_variable_name="context", + verbose=to_bool(os.getenv("VERBOSE")) + ) + + try: + doc_response = [doc.__dict__ for doc in docs] + socket.send({"question": question, "source_documents": doc_response}) + response = chain({"question": question, "input_documents": docs}) + response = { + "answer": response["output_text"], + } + socket.send(response) + except InvalidRequestError as err: + response = { + "question": question, + "answer": str(err), + "source_documents": [] + } + socket.send(response) + + return {'statusCode': 200} + except Exception as err: + print(event) + raise err + +def to_bool(val): + if isinstance(val, str): + return val.lower() not in ["", "no", "false", "0"] + return bool(val) diff --git a/chat/src/helpers/apitoken.py b/chat/src/helpers/apitoken.py new file mode 100644 index 00000000..4c6ecbd4 --- /dev/null +++ b/chat/src/helpers/apitoken.py @@ -0,0 +1,28 @@ +from datetime import datetime +import jwt +import os + +class ApiToken: + @classmethod + def empty_token(cls): + time = int(datetime.now().timestamp()) + return { + 'iss': os.getenv('DC_API_ENDPOINT'), + 'exp': datetime.fromtimestamp(time + 12 * 60 * 60).timestamp(), # 12 hours + 'iat': time, + 'entitlements': [], + 'isLoggedIn': False, + } + + def __init__(self, signed_token=None): + if signed_token is None: + self.token = ApiToken.empty_token() + else: + try: + secret = os.getenv("API_TOKEN_SECRET") + self.token = jwt.decode(signed_token, secret, algorithms=["HS256"]) + except Exception: + self.token = ApiToken.empty_token() + + def is_logged_in(self): + return self.token.get("isLoggedIn", False) diff --git a/chat/src/helpers/prompts.py b/chat/src/helpers/prompts.py new file mode 100644 index 00000000..b79510e8 --- /dev/null +++ b/chat/src/helpers/prompts.py @@ -0,0 +1,153 @@ +# ruff: noqa: E501 +def prompt_template(): + return """Using all of the provided source documents, create a helpful and thorough answer to the supplied question. + If you don't know the answer, just say that you don't know. Don't try to make up an answer, but you should use the documents provided in order to ground your response. + It may be helpful to explain why a provided document does not pertain to the query as well. + Feel free to reference various aspects of the sources in your explanation, but please don't include the full sources in the answer. + The Content field represents the title of each document, and the Metadata fields are the attributes. The Source field is the unique identifier for each document. + 'certainty' is an opinionated measure of the distance between the query vector and the document embedding vector. Certainty always returns a number between 0 and 1, with 1 indicating identical vectors and 0 indicating opposing angles. + + Content: Purchase order and note + Metadata: + _additional: {{'certainty': 0.8744078576564789, 'id': '29389b8d-a85d-46d1-9a6d-a738c6f81c88'}} + alternate_title: None + collection: Berkeley Folk Music Festival + contributor: ['University of California, Berkeley. Associated Students', 'Berkeley Folk Music Festival'] + creator: None + date_created: ['October 7, 1970', '1970?'] + description: ['Purchase order for costs related to security for the 1970 Berkeley Folk Music Festival and a handwritten note containing calculations and the heading "Police"'] + genre: ['notes (documents)', 'purchase orders'] + language: ['English'] + library_unit: Charles Deering McCormick Library of Special Collections + location: None + physical_description_material: None + physical_description_size: ['5 inches (height) x 3 inches (width)', '7 inches (height) x 8.5 inches (width)'] + published: True + rights_statement: In Copyright + scope_and_contents: None + series: ['Berkeley Folk Music Festival Archive--3. Festivals: Records, Budgets, Publicity'] + source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88 + style_period: None + subject: ['Berkeley Folk Music Festival (15th : 1970 : Berkeley, Calif.)'] + table_of_contents: None + technique: None + visibility: Public + work_type: Image + Source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88 + + Content: Berkeley Folk Music Festival, 1966 June 26-30 + Metadata: + _additional: {{'certainty': 0.869585394859314, 'id': '477e3f63-fc06-4bfc-8734-0b6100c0d1c3'}} + alternate_title: None + collection: Berkeley Folk Music Festival + contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] + creator: None + date_created: ['1966'] + description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.'] + genre: ['posters'] + language: ['English'] + library_unit: Charles Deering McCormick Library of Special Collections + location: None + physical_description_material: None + physical_description_size: ['12.75 inches (height) x 12.75 inches (width)'] + published: True + rights_statement: In Copyright + scope_and_contents: None + series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters'] + source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3 + style_period: None + subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Hawes, Bess Lomax, 1921-2009'] + table_of_contents: None + technique: None + visibility: Public + work_type: Image + Source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3 + + Content: Berkeley Folk Music Festival, 1966 June 26-30 + Metadata: + _additional: {{'certainty': 0.8694239258766174, 'id': 'bddeb375-762b-45e3-9e4e-5a4084ac5955'}} + alternate_title: None + collection: Berkeley Folk Music Festival + contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] + creator: None + date_created: ['1966'] + description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.'] + genre: ['posters'] + language: ['English'] + library_unit: Charles Deering McCormick Library of Special Collections + location: None + physical_description_material: None + physical_description_size: ['13.75 inches (height) x 21.75 inches (width)'] + published: True + rights_statement: In Copyright + scope_and_contents: None + series: ['Berkeley Folk Music Festival Archive--9. Posters of Berkeley Folk Music Festivals'] + source: bddeb375-762b-45e3-9e4e-5a4084ac5955 + style_period: None + subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival'] + table_of_contents: None + technique: None + visibility: Public + work_type: Image + Source: bddeb375-762b-45e3-9e4e-5a4084ac5955 + + Content: Berkeley Folk Music Festival, 1966 June 30-July 4 + Metadata: + _additional: {{'certainty': 0.8693937957286835, 'id': 'aab0bb76-ab02-429a-843a-5be56e31ba67'}} + alternate_title: None + collection: Berkeley Folk Music Festival + contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] + creator: None + date_created: ['1966'] + description: ['Poster for the 9th Annual Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger. Originally found in box 28, folder 3.'] + genre: ['posters'] + language: ['English'] + library_unit: Charles Deering McCormick Library of Special Collections + location: None + physical_description_material: None + physical_description_size: ['24.25 inches (height) x 37.5 inches (width)'] + published: True + rights_statement: In Copyright + scope_and_contents: None + series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters'] + source: aab0bb76-ab02-429a-843a-5be56e31ba67 + style_period: None + subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival'] + table_of_contents: None + technique: None + visibility: Public + work_type: Image + Source: aab0bb76-ab02-429a-843a-5be56e31ba67 + + QUESTION: Which musicians played at the Berkeley Folk Music Festival? + HELPFUL ANSWER: For the 1966 Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, the following musicians and groups were listed as performers: + + Pete Seeger + Jefferson Airplane + Sam Hinton + Greenbriar Boys + Shlomo Carlebach + John Fahey + Los Halcones de Salitrillos + Charley Marshall + Phil Ochs + Ralph J. Gleason + Malvina Reynolds + Robert Pete Williams + Alice Stuart Thomas + Bess Lomax Hawes + Charles Seeger + + Unfortunately, the documents provided do not include information about musicians who performed at the Berkeley Folk Music Festival in other years during the 1960s or 1970s. Therefore, I can only confirm the musicians for the 1966 festival. + + {context} + + QUESTION: {question} + ========= + HELPFUL ANSWER:""" + +def document_template(attributes): + lines = (["Content: {page_content}", "Metadata:"] + + [f" {attribute}: {{{attribute}}}" for attribute in attributes] + + ["Source: {source}"]) + return "\n".join(lines) diff --git a/chat/src/requirements.txt b/chat/src/requirements.txt new file mode 100644 index 00000000..aa6d612d --- /dev/null +++ b/chat/src/requirements.txt @@ -0,0 +1,14 @@ +# Runtime Dependencies +langchain~=0.0.208 +nbformat~=5.9.0 +openai~=0.27.8 +pandas~=2.0.2 +pyjwt~=2.6.0 +python-dotenv~=1.0.0 +tiktoken~=0.4.0 +weaviate-client~=3.19.2 +wheel~=0.40.0 + +# Dev/Test Dependencies +ruff~=0.1.0 +coverage~=7.3.2 diff --git a/python/src/setup.py b/chat/src/setup.py similarity index 73% rename from python/src/setup.py rename to chat/src/setup.py index d9cb9ad6..184b856b 100644 --- a/python/src/setup.py +++ b/chat/src/setup.py @@ -2,18 +2,20 @@ from langchain.vectorstores import Weaviate from typing import List import os -import jwt import weaviate -def openai_chat_client(): +def openai_chat_client(**kwargs): deployment = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_ID") key = os.getenv("AZURE_OPENAI_API_KEY") resource = os.getenv("AZURE_OPENAI_RESOURCE_NAME") + version = "2023-07-01-preview" return AzureChatOpenAI(deployment_name=deployment, openai_api_key=key, openai_api_base=f"https://{resource}.openai.azure.com/", - openai_api_version="2023-03-15-preview") + openai_api_version=version, + **kwargs) + def weaviate_vector_store(index_name: str, text_key: str, attributes: List[str] = []): @@ -31,14 +33,3 @@ def weaviate_vector_store(index_name: str, text_key: str, attributes: List[str] index_name=index_name, text_key=text_key, attributes=attributes) - - -def validate_token(token): - secret = os.getenv("API_TOKEN_SECRET") - try: - claim = jwt.decode(token, secret, algorithms=["HS256"]) - print(f"CLAIM: {claim}") - return claim.get("isLoggedIn", False) - except Exception as e: - print(e) - return False \ No newline at end of file diff --git a/chat/template.yaml b/chat/template.yaml new file mode 100644 index 00000000..303ec226 --- /dev/null +++ b/chat/template.yaml @@ -0,0 +1,237 @@ +AWSTemplateFormatVersion: "2010-09-09" +Transform: AWS::Serverless-2016-10-31 +Description: Websocket Chat API for dc-api-v2 +Parameters: + ApiTokenSecret: + Type: String + Description: Secret Key for Encrypting JWTs (must match IIIF server) + AzureOpenaiApiKey: + Type: String + Description: Azure OpenAI API Key + AzureOpenaiEmbeddingDeploymentId: + Type: String + Description: Azure OpenAI Embedding Deployment ID + AzureOpenaiLlmDeploymentId: + Type: String + Description: Azure OpenAI LLM Deployment ID + AzureOpenaiResourceName: + Type: String + Description: Azure OpenAI Resource Name + WeaviateApiKey: + Type: String + Description: Weaviate API Key + WeaviateUrl: + Type: String + Description: Weaviate URL +Resources: + ApiGwAccountConfig: + Type: "AWS::ApiGateway::Account" + Properties: + CloudWatchRoleArn: !GetAtt "ApiGatewayLoggingRole.Arn" + ApiGatewayLoggingRole: + Type: "AWS::IAM::Role" + Properties: + AssumeRolePolicyDocument: + Version: "2012-10-17" + Statement: + - Effect: Allow + Principal: + Service: + - "apigateway.amazonaws.com" + Action: "sts:AssumeRole" + Path: "/" + ManagedPolicyArns: + - !Sub "arn:${AWS::Partition}:iam::aws:policy/service-role/AmazonAPIGatewayPushToCloudWatchLogs" + ChatWebSocket: + Type: AWS::ApiGatewayV2::Api + Properties: + Name: ChatWebSocket + ProtocolType: WEBSOCKET + RouteSelectionExpression: "$request.body.message" + ConnectRoute: + Type: AWS::ApiGatewayV2::Route + Properties: + ApiId: !Ref ChatWebSocket + RouteKey: $connect + RouteResponseSelectionExpression: '$default' + AuthorizationType: NONE + ApiKeyRequired: false + OperationName: ConnectRoute + Target: !Sub 'integrations/${ConnectInteg}' + ConnectInteg: + Type: AWS::ApiGatewayV2::Integration + Properties: + ApiId: !Ref ChatWebSocket + Description: Connect Integration + IntegrationType: MOCK + RequestTemplates: + "200" : '{"statusCode" : 200}' + TemplateSelectionExpression: '200' + PassthroughBehavior: 'WHEN_NO_MATCH' + ConnectIntegResp: + Type: AWS::ApiGatewayV2::IntegrationResponse + Properties: + ApiId: !Ref ChatWebSocket + IntegrationId: !Ref ConnectInteg + IntegrationResponseKey: '$default' + ResponseTemplates: + "200" : '{"statusCode" : 200}' + ConnectRouteResponse: + Type: AWS::ApiGatewayV2::RouteResponse + Properties: + RouteId: !Ref ConnectRoute + ApiId: !Ref ChatWebSocket + RouteResponseKey: $default + DisconnectRoute: + Type: AWS::ApiGatewayV2::Route + Properties: + ApiId: !Ref ChatWebSocket + RouteKey: $disconnect + RouteResponseSelectionExpression: '$default' + AuthorizationType: NONE + OperationName: DisconnectRoute + Target: !Sub 'integrations/${DisconnectInteg}' + DisconnectInteg: + Type: AWS::ApiGatewayV2::Integration + Properties: + ApiId: !Ref ChatWebSocket + Description: Disconnect Integration + IntegrationType: MOCK + RequestTemplates: + "200" : '{"statusCode" : 200}' + TemplateSelectionExpression: '200' + PassthroughBehavior: 'WHEN_NO_MATCH' + DisconnectIntegResp: + Type: AWS::ApiGatewayV2::IntegrationResponse + Properties: + ApiId: !Ref ChatWebSocket + IntegrationId: !Ref DisconnectInteg + IntegrationResponseKey: '$default' + ResponseTemplates: + "200" : '{"statusCode" : 200}' + DisconnectRouteResponse: + Type: AWS::ApiGatewayV2::RouteResponse + Properties: + RouteId: !Ref DisconnectRoute + ApiId: !Ref ChatWebSocket + RouteResponseKey: $default + DefaultRoute: + Type: AWS::ApiGatewayV2::Route + Properties: + ApiId: !Ref ChatWebSocket + RouteKey: $default + RouteResponseSelectionExpression: '$default' + AuthorizationType: NONE + OperationName: DefaultRoute + Target: !Sub 'integrations/${DefaultInteg}' + DefaultInteg: + Type: AWS::ApiGatewayV2::Integration + Properties: + ApiId: !Ref ChatWebSocket + Description: Default Integration + IntegrationType: MOCK + RequestTemplates: + "200" : '{"statusCode" : 200}' + TemplateSelectionExpression: '200' + DefaultIntegResp: + Type: AWS::ApiGatewayV2::IntegrationResponse + Properties: + ApiId: !Ref ChatWebSocket + IntegrationId: !Ref DefaultInteg + IntegrationResponseKey: $default + ResponseTemplates: + "200" : '{"statusCode" : 200, "connectionId" : "$context.connectionId"}' + TemplateSelectionExpression: '200' + DefaultRouteResponse: + Type: AWS::ApiGatewayV2::RouteResponse + Properties: + RouteId: !Ref DefaultRoute + ApiId: !Ref ChatWebSocket + RouteResponseKey: $default + ChatRoute: + Type: AWS::ApiGatewayV2::Route + Properties: + ApiId: !Ref ChatWebSocket + RouteKey: chat + AuthorizationType: NONE + OperationName: ChatRoute + Target: !Sub 'integrations/${ChatIntegration}' + ChatIntegration: + Type: AWS::ApiGatewayV2::Integration + Properties: + ApiId: !Ref ChatWebSocket + Description: Chat Integration + IntegrationType: AWS_PROXY + IntegrationUri: !Sub "arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/${ChatFunction.Arn}/invocations" + ChatPermission: + Type: AWS::Lambda::Permission + DependsOn: + - ChatWebSocket + Properties: + Action: lambda:InvokeFunction + FunctionName: !Ref ChatFunction + Principal: apigateway.amazonaws.com + ChatDependencies: + Type: AWS::Serverless::LayerVersion + Properties: + LayerName: + Fn::Sub: "${AWS::StackName}-dependencies" + Description: Dependencies for streaming chat function + ContentUri: ./dependencies + CompatibleRuntimes: + - python3.10 + LicenseInfo: "Apache-2.0" + Metadata: + BuildMethod: python3.10 + ChatFunction: + Type: AWS::Serverless::Function + Properties: + CodeUri: ./src + Runtime: python3.10 + Architectures: + - x86_64 + Layers: + - !Ref ChatDependencies + MemorySize: 128 + Handler: handlers/chat.handler + Timeout: 300 + Environment: + Variables: + API_TOKEN_SECRET: !Ref ApiTokenSecret + AZURE_OPENAI_API_KEY: !Ref AzureOpenaiApiKey + AZURE_OPENAI_EMBEDDING_DEPLOYMENT_ID: !Ref AzureOpenaiEmbeddingDeploymentId + AZURE_OPENAI_LLM_DEPLOYMENT_ID: !Ref AzureOpenaiLlmDeploymentId + AZURE_OPENAI_RESOURCE_NAME: !Ref AzureOpenaiResourceName + WEAVIATE_API_KEY: !Ref WeaviateApiKey + WEAVIATE_URL: !Ref WeaviateUrl + Policies: + - Statement: + - Effect: Allow + Action: + - 'execute-api:ManageConnections' + Resource: + - !Sub 'arn:aws:execute-api:${AWS::Region}:${AWS::AccountId}:${ChatWebSocket}/*' + Metadata: + BuildMethod: nodejs18.x + Deployment: + Type: AWS::ApiGatewayV2::Deployment + DependsOn: + - ConnectRoute + - DisconnectRoute + - DefaultRoute + - ChatRoute + Properties: + ApiId: !Ref ChatWebSocket + Stage: + Type: AWS::ApiGatewayV2::Stage + Properties: + StageName: latest + DeploymentId: !Ref Deployment + ApiId: !Ref ChatWebSocket + DefaultRouteSettings: + DetailedMetricsEnabled: true + LoggingLevel: INFO +Outputs: + WebSocketURI: + Description: "The WSS Protocol URI to connect to" + Value: !Sub 'wss://${ChatWebSocket}.execute-api.${AWS::Region}.amazonaws.com/${Stage}' diff --git a/python/test/__init__.py b/chat/test/__init__.py similarity index 100% rename from python/test/__init__.py rename to chat/test/__init__.py diff --git a/chat/test/fixtures/apitoken.py b/chat/test/fixtures/apitoken.py new file mode 100644 index 00000000..0f61693c --- /dev/null +++ b/chat/test/fixtures/apitoken.py @@ -0,0 +1,5 @@ +TEST_SECRET = "TEST_SECRET" +TEST_TOKEN_NAME = "dcTestToken" +TEST_TOKEN = ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ4NDM1ODY2MDYxNjUs' + 'ImlhdCI6MTY4Nzg5MTM2OSwiZW50aXRsZW1lbnRzIjpbXSwiaXNMb2dnZWRJbiI6d' + 'HJ1ZSwic3ViIjoidGVzdFVzZXIifQ.vIZag1pHE1YyrxsKKlakXX_44ckAvkg7xWOoA_w4x58') diff --git a/chat/test/fixtures/events.py b/chat/test/fixtures/events.py new file mode 100644 index 00000000..1b275be7 --- /dev/null +++ b/chat/test/fixtures/events.py @@ -0,0 +1,56 @@ +from copy import deepcopy +from test.fixtures.apitoken import TEST_TOKEN_NAME, TEST_TOKEN + +POST_EVENT = { + "version": "2.0", + "routeKey": "$default", + "rawPath": "/chat", + "cookies": [ + "cookie_1=cookie_value_1", + "cookie_2=cookie_value_2", + ], + "headers": { + "Authorization": f"Bearer {TEST_TOKEN}", + "origin": "https://example.edu" + }, + "queryStringParameters": { + "param1": "value1", + "param2": "value2", + }, + "requestContext": { + "accountId": "123456789012", + "apiId": "api-id", + "domainName": "id.execute-api.us-east-1.amazonaws.com", + "domainPrefix": "id", + "http": { + "method": "POST", + "path": "/chat", + "protocol": "HTTP/1.1", + "sourceIp": "192.168.0.1/32", + "userAgent": "agent" + }, + "requestId": "id", + "routeKey": "$default", + "stage": "$default", + "time": "12/Mar/2020:19:03:58 +0000", + "timeEpoch": 1583348638390 + }, + "body": "UE9TVGVkIENvbnRlbnQ=", + "pathParameters": {}, + "isBase64Encoded": True, + "stageVariables": {} +} + +PLAIN_BODY_EVENT = deepcopy(POST_EVENT) +PLAIN_BODY_EVENT["isBase64Encoded"] = False +PLAIN_BODY_EVENT["body"] = "POSTed Content" + +NO_BODY_EVENT = deepcopy(POST_EVENT) +NO_BODY_EVENT["isBase64Encoded"] = False +NO_BODY_EVENT["body"] = "" + +NO_TOKEN_EVENT = deepcopy(POST_EVENT) +del NO_TOKEN_EVENT["headers"]["Authorization"] + +COOKIE_TOKEN_EVENT = deepcopy(NO_TOKEN_EVENT) +COOKIE_TOKEN_EVENT["cookies"].append(f"{TEST_TOKEN_NAME}={TEST_TOKEN}") diff --git a/python/test/handlers/__init__.py b/chat/test/handlers/__init__.py similarity index 100% rename from python/test/handlers/__init__.py rename to chat/test/handlers/__init__.py diff --git a/chat/test/helpers/__init__.py b/chat/test/helpers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/chat/test/helpers/test_apitoken.py b/chat/test/helpers/test_apitoken.py new file mode 100644 index 00000000..ce6b6ae0 --- /dev/null +++ b/chat/test/helpers/test_apitoken.py @@ -0,0 +1,24 @@ +import os +from src.helpers.apitoken import ApiToken +from test.fixtures.apitoken import TEST_SECRET, TEST_TOKEN +from unittest import mock, TestCase + +@mock.patch.dict( + os.environ, + { + "API_TOKEN_SECRET": TEST_SECRET + } +) +class TestFunction(TestCase): + def test_empty_token(self): + subject = ApiToken() + self.assertFalse(subject.is_logged_in()) + + def test_valid_token(self): + subject = ApiToken(TEST_TOKEN) + self.assertTrue(subject.is_logged_in()) + + def test_invalid_token(self): + subject = ApiToken("INVALID_TOKEN") + self.assertFalse(subject.is_logged_in()) + \ No newline at end of file diff --git a/node/package.json b/node/package.json index 8e15f511..a4d9f3ef 100644 --- a/node/package.json +++ b/node/package.json @@ -10,7 +10,7 @@ }, "scripts": { "lint": "eslint src/**/*.js test/**/*.js", - "preinstall": "cd src && npm i && cd ../lambdas && npm i && cd ../", + "preinstall": "cd src && npm i && cd - && cd ../lambdas && npm i && cd -", "prettier": "prettier -c src test", "prettier:fix": "prettier -cw src test", "test": "mocha", diff --git a/node/src/handlers/get-chat-endpoint.js b/node/src/handlers/get-chat-endpoint.js new file mode 100644 index 00000000..c40ee77b --- /dev/null +++ b/node/src/handlers/get-chat-endpoint.js @@ -0,0 +1,21 @@ +const { wrap } = require("./middleware"); + +const handler = wrap(async (event) => { + if (!event.userToken.isLoggedIn()) { + return { + statusCode: 401, + headers: { "Content-Type": "text/plain" }, + body: "Authorization Required", + }; + } + + return { + statusCode: 200, + body: JSON.stringify({ + endpoint: process.env.WEBSOCKET_URI, + auth: event.userToken.sign(), + }), + }; +}); + +module.exports = { handler }; diff --git a/node/test/integration/get-chat-endpoint.test.js b/node/test/integration/get-chat-endpoint.test.js new file mode 100644 index 00000000..cc9f9496 --- /dev/null +++ b/node/test/integration/get-chat-endpoint.test.js @@ -0,0 +1,31 @@ +"use strict"; + +const chai = require("chai"); +const expect = chai.expect; + +const getChatEndpointHandler = requireSource("handlers/get-chat-endpoint"); +const ApiToken = requireSource("api/api-token"); + +describe("GET /chat-endpoint", function () { + helpers.saveEnvironment(); + + it("returns the websocket URI and token to a logged in user", async () => { + const token = new ApiToken().user({ uid: "abc123" }).sign(); + + const event = helpers + .mockEvent("GET", "/chat-endpoint") + .headers({ + Authorization: `Bearer ${token}`, + }) + .render(); + + const result = await getChatEndpointHandler.handler(event); + + expect(result.statusCode).to.eq(200); + const response = JSON.parse(result.body); + expect(response).to.contain({ + endpoint: "wss://thisisafakewebsocketapiurl", + auth: token, + }); + }); +}); diff --git a/node/test/integration/oai.test.js b/node/test/integration/oai.test.js index 82e26345..ae1051ec 100644 --- a/node/test/integration/oai.test.js +++ b/node/test/integration/oai.test.js @@ -398,7 +398,6 @@ describe("Oai routes", () => { expect(result.statusCode).to.eq(200); expect(result).to.have.header("content-type", /application\/xml/); const resultBody = convert.xml2js(result.body, xmlOpts); - console.log(resultBody["OAI-PMH"].ListIdentifiers.header); expect(resultBody["OAI-PMH"].ListIdentifiers.header) .to.be.an("object") .to.have.keys(["identifier", "datestamp", "setSpec"]); diff --git a/node/test/test-helpers/index.js b/node/test/test-helpers/index.js index a90397fb..8045b1ed 100644 --- a/node/test/test-helpers/index.js +++ b/node/test/test-helpers/index.js @@ -13,6 +13,7 @@ const TestEnvironment = { DC_API_ENDPOINT: "https://thisisafakeapiurl", NUSSO_BASE_URL: "https://nusso-base.com/", NUSSO_API_KEY: "abc123", + WEBSOCKET_URI: "wss://thisisafakewebsocketapiurl", }; for (const v in TestEnvironment) delete process.env[v]; diff --git a/python/requirements.txt b/python/requirements.txt deleted file mode 100644 index 074747e7..00000000 --- a/python/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ --r src/requirements.txt -coverage -ruff \ No newline at end of file diff --git a/python/src/handlers/chat.py b/python/src/handlers/chat.py deleted file mode 100644 index 8ba0ae69..00000000 --- a/python/src/handlers/chat.py +++ /dev/null @@ -1,85 +0,0 @@ -# ruff: noqa: E501 -import base64 -import json -import os -import setup -from langchain.chains import RetrievalQAWithSourcesChain - -def handler(event, context): - if not is_authenticated(event): - return { - "statusCode": 401, - "headers": { - "Content-Type": "text/plain" - }, - "body": "Unauthorized" - } - question = get_query(event) - index_name = get_param(event, "index", "Work") - text_key = get_param(event, "text_key", "title") - attributes = get_param(event, - "attributes", - "identifier,title,source,alternate_title,contributor,create_date,creator,date_created,description,genre,keywords,language,location,physical_description_material,physical_description_size,scope_and_contents,style_period,subject,table_of_contents,technique,work_type").split(",") - - weaviate = setup.weaviate_vector_store(index_name=index_name, - text_key=text_key, - attributes=attributes) - - client = setup.openai_chat_client() - - - chain = RetrievalQAWithSourcesChain.from_chain_type( - client, - chain_type="stuff", - retriever=weaviate.as_retriever(search_kwargs=dict(additional="certainty")), - return_source_documents=True) - - response = chain({"question": question}) - print(response) - response['source_documents'] = [doc.__dict__ for doc in response['source_documents']] - return { - "statusCode": 200, - "headers": { - "Content-Type": "application/json", - "access-control-allow-methods": "POST, GET", - "access-control-allow-credentials": True, - "access-control-max-age": 600, - "access-control-allow-origin": get_header(event, "Origin", "*"), - "access-control-allow-headers": "Accept, Accept-Charset, Accept-Encoding, Accept-Language, Accept-Datetime, Authorization, Cache-Control, Content-Length, Content-Type, Cookie, Date, Expect, Host, If-Match, If-Modified-Since, If-None-Match, If-Range, If-Unmodified-Since, Origin, Pragma, Range, Referer, User-Agent, X-CSRF-Token, X-Forwarded-For, X-Forwarded-Host, X-Forwarded-Port, X-Requested-With" - }, - "body": json.dumps(response) - } - -def get_header(event, header, default=None): - headers = event.get("headers") - return headers.get(header, headers.get(header.lower(), default)) - -def get_param(event, parameter, default): - params = event.get("queryStringParameters", {}) - return params.get(parameter, default) - - -def get_query(event): - question = event.get("body", "") - if event.get("isBase64Encoded", False): - question = base64.b64decode(question) - return question - - -def is_authenticated(event): - token = get_header(event, "Authorization") - - if token is None: - for cookie in event.get("cookies", []): - [k, v] = cookie.split("=", 1) - if k == os.getenv("API_TOKEN_NAME"): - token = v - else: - token = token.replace("Bearer ", "") - - return setup.validate_token(token) - - - -# result = weaviate.similarity_search_by_text(query=question, -# additional="certainty") \ No newline at end of file diff --git a/python/src/handlers/hello.py b/python/src/handlers/hello.py deleted file mode 100644 index 5d4e5bd3..00000000 --- a/python/src/handlers/hello.py +++ /dev/null @@ -1,12 +0,0 @@ -import os - -def lambda_handler(event, context): - params = event.get("queryStringParameters", {}) - name = params.get("name", os.getenv("DEFAULT_NAME", "No One")) - return { - "statusCode": 200, - "headers": { - "Content-Type": "text/plain" - }, - "body": f"Hello, {name}" - } \ No newline at end of file diff --git a/python/test/handlers/test_hello.py b/python/test/handlers/test_hello.py deleted file mode 100644 index b94b1c11..00000000 --- a/python/test/handlers/test_hello.py +++ /dev/null @@ -1,9 +0,0 @@ -import unittest -from src.handlers import hello - -class TestFunction(unittest.TestCase): - def test_function(self): - event = {'queryStringParameters': {'name': 'Joe'}} - context = {'requestid' : '1234'} - result = hello.lambda_handler(event, context) - self.assertEqual(result['body'], 'Hello, Joe') diff --git a/template.yaml b/template.yaml index 8dccdd16..2c1ab611 100644 --- a/template.yaml +++ b/template.yaml @@ -100,6 +100,9 @@ Parameters: NussoBaseUrl: Type: String Description: Auth server URL + PrototypeUrl: + Type: String + Description: URL of prototype ReadingRoomIPs: Type: String Description: Comma-delimited list of IP addresses to serve private resources to @@ -649,44 +652,33 @@ Resources: ApiId: !Ref dcApi Path: /oai Method: POST - helloWorldFunction: + chatWebsocket: + Type: AWS::Serverless::Application + Properties: + Location: ./chat/template.yaml + Parameters: + ApiTokenSecret: !Ref ApiTokenSecret + AzureOpenaiApiKey: !Ref AzureOpenaiApiKey + AzureOpenaiEmbeddingDeploymentId: !Ref AzureOpenaiEmbeddingDeploymentId + AzureOpenaiLlmDeploymentId: !Ref AzureOpenaiLlmDeploymentId + AzureOpenaiResourceName: !Ref AzureOpenaiResourceName + WeaviateApiKey: !Ref WeaviateApiKey + WeaviateUrl: !Ref WeaviateUrl + chatWebsocketEndpoint: Type: AWS::Serverless::Function Properties: - CodeUri: ./python/src - Runtime: python3.9 - Handler: handlers/hello.lambda_handler + Handler: handlers/get-chat-endpoint.handler + Description: Returns the URI of the chat websocket API. Environment: Variables: - DEFAULT_NAME: "World" + WEBSOCKET_URI: !GetAtt chatWebsocket.Outputs.WebSocketURI Events: GetApiGet: Type: HttpApi Properties: ApiId: !Ref dcApi - Path: /hello + Path: /chat-endpoint Method: GET - chatFunction: - Type: AWS::Serverless::Function - Properties: - CodeUri: ./python/src - Runtime: python3.9 - Handler: handlers/chat.handler - Timeout: 300 - Environment: - Variables: - AZURE_OPENAI_API_KEY: !Ref AzureOpenaiApiKey - AZURE_OPENAI_EMBEDDING_DEPLOYMENT_ID: !Ref AzureOpenaiEmbeddingDeploymentId - AZURE_OPENAI_LLM_DEPLOYMENT_ID: !Ref AzureOpenaiLlmDeploymentId - AZURE_OPENAI_RESOURCE_NAME: !Ref AzureOpenaiResourceName - WEAVIATE_API_KEY: !Ref WeaviateApiKey - WEAVIATE_URL: !Ref WeaviateUrl - Events: - PostApi: - Type: HttpApi - Properties: - ApiId: !Ref dcApi - Path: /chat - Method: POST defaultFunction: Type: AWS::Serverless::Function Properties: From b1246b6138965f73e4e77ed51d1857be51ae06d9 Mon Sep 17 00:00:00 2001 From: "Michael B. Klein" Date: Tue, 17 Oct 2023 19:47:38 +0000 Subject: [PATCH 5/6] Allow superuser to override prompt and attributes Allow any request to override index name and k value Make sure chatWebsocketEndpoint uses the API dependency layer Fix up attribute filtering Adds debug mode to chat handler Simplify the prompt template, now that we can override it in development easily Remove full_text from LLM prompt Bump chatFunction memory to 1GB Allow for overriding parameters to the LLM with default configuration in place - Large refactor of configuration handling, adds the ability to override many more parameters via websocket messages - Tests passing in dev environment using the Makefile and Github actions - Allow for skipping weaviate setup in Github actions via environment variable Temporarily removes full_text from vector searches --- .github/workflows/deploy.yml | 2 +- .github/workflows/test-python.yml | 1 + .gitignore | 1 + Makefile | 120 +++++----- chat/dependencies/requirements.txt | 3 +- chat/src/event_config.py | 216 ++++++++++++++++++ chat/src/handlers/__init__.py | 0 chat/src/handlers/chat.py | 140 +++--------- .../streaming_socket_callback_handler.py | 11 + chat/src/helpers/__init__.py | 0 chat/src/helpers/apitoken.py | 51 +++-- chat/src/helpers/metrics.py | 18 ++ chat/src/helpers/prompts.py | 164 ++----------- chat/src/helpers/response.py | 56 +++++ chat/src/helpers/utils.py | 7 + chat/src/requirements.txt | 3 +- chat/src/setup.py | 72 ++++-- chat/src/websocket.py | 16 ++ chat/template.yaml | 2 +- chat/test/fixtures/apitoken.py | 3 + chat/test/handlers/test_chat.py | 74 ++++++ .../test_streaming_socket_callback_handler.py | 26 +++ chat/test/helpers/test_apitoken.py | 60 +++-- chat/test/helpers/test_metrics.py | 64 ++++++ chat/test/helpers/test_prompts.py | 33 +++ chat/test/test_event_config.py | 112 +++++++++ chat/test/test_websocket.py | 18 ++ template.yaml | 2 + 28 files changed, 884 insertions(+), 391 deletions(-) create mode 100644 chat/src/event_config.py create mode 100644 chat/src/handlers/__init__.py create mode 100644 chat/src/handlers/streaming_socket_callback_handler.py create mode 100644 chat/src/helpers/__init__.py create mode 100644 chat/src/helpers/metrics.py create mode 100644 chat/src/helpers/response.py create mode 100644 chat/src/helpers/utils.py create mode 100644 chat/src/websocket.py create mode 100644 chat/test/handlers/test_chat.py create mode 100644 chat/test/handlers/test_streaming_socket_callback_handler.py create mode 100644 chat/test/helpers/test_metrics.py create mode 100644 chat/test/helpers/test_prompts.py create mode 100644 chat/test/test_event_config.py create mode 100644 chat/test/test_websocket.py diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 27a4a28e..6699aa8d 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -7,7 +7,7 @@ on: paths: - ".github/workflows/deploy.yml" - "node/**" - - "python/**" + - "chat/**" - "template.yaml" workflow_dispatch: concurrency: diff --git a/.github/workflows/test-python.yml b/.github/workflows/test-python.yml index ff995d1c..c1f3e46d 100644 --- a/.github/workflows/test-python.yml +++ b/.github/workflows/test-python.yml @@ -14,6 +14,7 @@ jobs: env: AWS_ACCESS_KEY_ID: ci AWS_SECRET_ACCESS_KEY: ci + SKIP_WEAVIATE_SETUP: 'True' steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 diff --git a/.gitignore b/.gitignore index adf3345d..3c9b74f5 100644 --- a/.gitignore +++ b/.gitignore @@ -28,6 +28,7 @@ lerna-debug.log* ### Python ### .coverage +htmlcov __pycache__/ *.py[cod] *$py.class diff --git a/Makefile b/Makefile index 41f41972..dded3f4f 100644 --- a/Makefile +++ b/Makefile @@ -1,58 +1,62 @@ -ifndef VERBOSE -.SILENT: -endif -ENV=dev - -help: - echo "make build | build the SAM project" - echo "make serve | run the SAM server locally" - echo "make clean | remove all installed dependencies and build artifacts" - echo "make deps | install all dependencies" - echo "make link | create hard links to allow for hot reloading of a built project" - echo "make secrets | symlink secrets files from ../tfvars" - echo "make style | run all style checks" - echo "make test | run all tests" - echo "make cover | run all tests with coverage" - echo "make env ENV=[env] | activate env.\$$ENV.json file (default: dev)" - echo "make deps-node | install node dependencies" - echo "make deps-python | install python dependencies" - echo "make style-node | run node code style check" - echo "make style-python | run python code style check" - echo "make test-node | run node tests" - echo "make test-python | run python tests" - echo "make cover-node | run node tests with coverage" - echo "make cover-python | run python tests with coverage" -.aws-sam/build.toml: ./template.yaml node/package-lock.json node/src/package-lock.json python/requirements.txt python/src/requirements.txt - sam build --cached --parallel -deps-node: - cd node && npm ci -cover-node: - cd node && npm run test:coverage -style-node: - cd node && npm run prettier -test-node: - cd node && npm run test -deps-python: - cd chat/src && pip install -r requirements.txt -cover-python: - cd chat/src && coverage run --include='src/**/*' -m unittest -v && coverage report -style-python: - cd chat && ruff check . -test-python: - cd chat && python -m unittest -v -build: .aws-sam/build.toml -link: build - cd chat/src && for src in *.py **/*.py; do for target in $$(find ../../.aws-sam/build -maxdepth 1 -type d); do if [[ -f $$target/$$src ]]; then ln -f $$src $$target/$$src; fi; done; done - cd node/src && for src in *.js *.json **/*.js **/*.json; do for target in $$(find ../../.aws-sam/build -maxdepth 1 -type d); do if [[ -f $$target/$$src ]]; then ln -f $$src $$target/$$src; fi; done; done -serve: link - sam local start-api --host 0.0.0.0 --log-file dc-api.log -deps: deps-node deps-python -style: style-node style-python -test: test-node test-python -cover: cover-node cover-python -env: - ln -fs ./env.${ENV}.json ./env.json -secrets: - ln -s ../tfvars/dc-api/* . -clean: - rm -rf .aws-sam node/node_modules node/src/node_modules python/**/__pycache__ python/.coverage python/.ruff_cache +ifndef VERBOSE +.SILENT: +endif +ENV=dev + +help: + echo "make build | build the SAM project" + echo "make serve | run the SAM server locally" + echo "make clean | remove all installed dependencies and build artifacts" + echo "make deps | install all dependencies" + echo "make link | create hard links to allow for hot reloading of a built project" + echo "make secrets | symlink secrets files from ../tfvars" + echo "make style | run all style checks" + echo "make test | run all tests" + echo "make cover | run all tests with coverage" + echo "make env ENV=[env] | activate env.\$$ENV.json file (default: dev)" + echo "make deps-node | install node dependencies" + echo "make deps-python | install python dependencies" + echo "make style-node | run node code style check" + echo "make style-python | run python code style check" + echo "make test-node | run node tests" + echo "make test-python | run python tests" + echo "make cover-node | run node tests with coverage" + echo "make cover-python | run python tests with coverage" +.aws-sam/build.toml: ./template.yaml node/package-lock.json node/src/package-lock.json chat/dependencies/requirements.txt chat/src/requirements.txt + sam build --cached --parallel +deps-node: + cd node && npm ci +cover-node: + cd node && npm run test:coverage +style-node: + cd node && npm run prettier +test-node: + cd node && npm run test +deps-python: + cd chat/src && pip install -r requirements.txt +cover-python: deps-python + cd chat && export SKIP_WEAVIATE_SETUP=True && coverage run --source=src -m unittest -v && coverage report --skip-empty +cover-html-python: deps-python + cd chat && export SKIP_WEAVIATE_SETUP=True && coverage run --source=src -m unittest -v && coverage html --skip-empty +style-python: deps-python + cd chat && ruff check . +test-python: deps-python + cd chat && export SKIP_WEAVIATE_SETUP=True && PYTHONPATH=src:test && python -m unittest discover -v +python-version: + cd chat && python --version +build: .aws-sam/build.toml +link: build + cd chat/src && for src in *.py **/*.py; do for target in $$(find ../../.aws-sam/build -maxdepth 1 -type d); do if [[ -f $$target/$$src ]]; then ln -f $$src $$target/$$src; fi; done; done + cd node/src && for src in *.js *.json **/*.js **/*.json; do for target in $$(find ../../.aws-sam/build -maxdepth 1 -type d); do if [[ -f $$target/$$src ]]; then ln -f $$src $$target/$$src; fi; done; done +serve: link + sam local start-api --host 0.0.0.0 --log-file dc-api.log +deps: deps-node deps-python +style: style-node style-python +test: test-node test-python +cover: cover-node cover-python +env: + ln -fs ./env.${ENV}.json ./env.json +secrets: + ln -s ../tfvars/dc-api/* . +clean: + rm -rf .aws-sam node/node_modules node/src/node_modules python/**/__pycache__ python/.coverage python/.ruff_cache \ No newline at end of file diff --git a/chat/dependencies/requirements.txt b/chat/dependencies/requirements.txt index 68ec56c3..6bee442a 100644 --- a/chat/dependencies/requirements.txt +++ b/chat/dependencies/requirements.txt @@ -1,7 +1,6 @@ +boto3~=1.34.13 langchain~=0.0.208 -nbformat~=5.9.0 openai~=0.27.8 -pandas~=2.0.2 pyjwt~=2.6.0 python-dotenv~=1.0.0 tiktoken~=0.4.0 diff --git a/chat/src/event_config.py b/chat/src/event_config.py new file mode 100644 index 00000000..5c7762b3 --- /dev/null +++ b/chat/src/event_config.py @@ -0,0 +1,216 @@ +import os +import json + +from dataclasses import dataclass, field +from langchain.chains.qa_with_sources import load_qa_with_sources_chain +from langchain.prompts import PromptTemplate +from setup import ( + weaviate_client, + weaviate_vector_store, + openai_chat_client, +) +from typing import List +from handlers.streaming_socket_callback_handler import StreamingSocketCallbackHandler +from helpers.apitoken import ApiToken +from helpers.prompts import document_template, prompt_template +from websocket import Websocket + + +CHAIN_TYPE = "stuff" +DOCUMENT_VARIABLE_NAME = "context" +INDEX_NAME = "DCWork" +K_VALUE = 10 +MAX_K = 100 +TEMPERATURE = 0.2 +TEXT_KEY = "title" +VERSION = "2023-07-01-preview" + + +@dataclass +class EventConfig: + """ + The EventConfig class represents the configuration for an event. + Default values are set for the following properties which can be overridden in the payload message. + """ + + api_token: ApiToken = field(init=False) + attributes: List[str] = field(init=False) + azure_endpoint: str = field(init=False) + azure_resource_name: str = field(init=False) + debug_mode: bool = field(init=False) + deployment_name: str = field(init=False) + document_prompt: PromptTemplate = field(init=False) + event: dict = field(default_factory=dict) + index_name: str = field(init=False) + is_logged_in: bool = field(init=False) + k: int = field(init=False) + openai_api_version: str = field(init=False) + payload: dict = field(default_factory=dict) + prompt_text: str = field(init=False) + prompt: PromptTemplate = field(init=False) + question: str = field(init=False) + ref: str = field(init=False) + request_context: dict = field(init=False) + temperature: float = field(init=False) + socket: Websocket = field(init=False, default=None) + text_key: str = field(init=False) + + def __post_init__(self): + self.payload = json.loads(self.event.get("body", "{}")) + self.api_token = ApiToken(signed_token=self.payload.get("auth")) + self.attributes = self._get_attributes() + self.azure_endpoint = self._get_azure_endpoint() + self.azure_resource_name = self._get_azure_resource_name() + self.azure_endpoint = self._get_azure_endpoint() + self.debug_mode = self._is_debug_mode_enabled() + self.deployment_name = self._get_deployment_name() + self.index_name = self._get_index_name() + self.is_logged_in = self.api_token.is_logged_in() + self.k = self._get_k() + self.openai_api_version = self._get_openai_api_version() + self.prompt_text = self._get_prompt_text() + self.request_context = self.event.get("requestContext", {}) + self.question = self.payload.get("question") + self.ref = self.payload.get("ref") + self.temperature = self._get_temperature() + self.text_key = self._get_text_key() + self.attributes = self._get_attributes() + self.document_prompt = self._get_document_prompt() + self.prompt = PromptTemplate(template=self.prompt_text, input_variables=["question", "context"]) + + def _get_payload_value_with_superuser_check(self, key, default): + if self.api_token.is_superuser(): + return self.payload.get(key, default) + else: + return default + + def _get_azure_endpoint(self): + default = f"https://{self._get_azure_resource_name()}.openai.azure.com/" + return self._get_payload_value_with_superuser_check("azure_endpoint", default) + + def _get_azure_resource_name(self): + azure_resource_name = self._get_payload_value_with_superuser_check("azure_resource_name", os.environ.get("AZURE_OPENAI_RESOURCE_NAME")) + if not azure_resource_name: + raise EnvironmentError( + "Either payload must contain 'azure_resource_name' or environment variable 'AZURE_OPENAI_RESOURCE_NAME' must be set" + ) + return azure_resource_name + + def _get_deployment_name(self): + return self._get_payload_value_with_superuser_check("deployment_name", os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_ID")) + + def _get_index_name(self): + return self._get_payload_value_with_superuser_check("index", INDEX_NAME) + + def _get_k(self): + value = self._get_payload_value_with_superuser_check("k", K_VALUE) + return min(value, MAX_K) + + def _get_openai_api_version(self): + return self._get_payload_value_with_superuser_check("openai_api_version", VERSION) + + def _get_prompt_text(self): + return self._get_payload_value_with_superuser_check("prompt", prompt_template()) + + def _get_temperature(self): + return self._get_payload_value_with_superuser_check("temperature", TEMPERATURE) + + def _get_text_key(self): + return self._get_payload_value_with_superuser_check("text_key", TEXT_KEY) + + def _get_attributes(self): + attributes = [ + item + for item in self._get_request_attributes() + if item not in [self._get_text_key(), "source", "full_text"] + ] + return attributes + + def _get_request_attributes(self): + if os.getenv("SKIP_WEAVIATE_SETUP"): + return [] + + attributes = self._get_payload_value_with_superuser_check("attributes", []) + if attributes: + return attributes + else: + client = weaviate_client() + schema = client.schema.get(self._get_index_name()) + names = [prop["name"] for prop in schema.get("properties")] + return names + + def _get_document_prompt(self): + return PromptTemplate( + template=document_template(self.attributes), + input_variables=["page_content", "source"] + self.attributes, + ) + + def debug_message(self): + return { + "type": "debug", + "message": { + "attributes": self.attributes, + "azure_endpoint": self.azure_endpoint, + "deployment_name": self.deployment_name, + "index": self.index_name, + "k": self.k, + "openai_api_version": self.openai_api_version, + "prompt": self.prompt_text, + "question": self.question, + "ref": self.ref, + "temperature": self.temperature, + "text_key": self.text_key, + }, + } + + def setup_websocket(self, socket=None): + if socket is None: + connection_id = self.request_context.get("connectionId") + endpoint_url = f'https://{self.request_context.get("domainName")}/{self.request_context.get("stage")}' + self.socket = Websocket(endpoint_url=endpoint_url, connection_id=connection_id, ref=self.ref) + else: + self.socket = socket + return self.socket + + def setup_llm_request(self): + self._setup_vector_store() + self._setup_chat_client() + self._setup_chain() + + def _setup_vector_store(self): + self.weaviate = weaviate_vector_store( + index_name=self.index_name, + text_key=self.text_key, + attributes=self.attributes + ["source"], + ) + + def _setup_chat_client(self): + self.client = openai_chat_client( + deployment_name=self.deployment_name, + openai_api_base=self.azure_endpoint, + openai_api_version=self.openai_api_version, + callbacks=[StreamingSocketCallbackHandler(self.socket, self.debug_mode)], + streaming=True, + ) + + def _setup_chain(self): + self.chain = load_qa_with_sources_chain( + self.client, + chain_type=CHAIN_TYPE, + prompt=self.prompt, + document_prompt=self.document_prompt, + document_variable_name=DOCUMENT_VARIABLE_NAME, + verbose=self._to_bool(os.getenv("VERBOSE")), + ) + + def _is_debug_mode_enabled(self): + debug = self.payload.get("debug", False) + return debug and self.api_token.is_superuser() + + def _to_bool(self, val): + """Converts a value to boolean. If the value is a string, it considers + "", "no", "false", "0" as False. Otherwise, it returns the boolean of the value. + """ + if isinstance(val, str): + return val.lower() not in ["", "no", "false", "0"] + return bool(val) diff --git a/chat/src/handlers/__init__.py b/chat/src/handlers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/chat/src/handlers/chat.py b/chat/src/handlers/chat.py index d38a4bf0..aa19ff79 100644 --- a/chat/src/handlers/chat.py +++ b/chat/src/handlers/chat.py @@ -1,117 +1,29 @@ -import boto3 -import json import os -import setup -from helpers.apitoken import ApiToken -from helpers.prompts import document_template, prompt_template -from langchain.callbacks.base import BaseCallbackHandler -from langchain.chains.qa_with_sources import load_qa_with_sources_chain -from langchain.prompts import PromptTemplate -from openai.error import InvalidRequestError +from event_config import EventConfig +from helpers.response import prepare_response -DEFAULT_INDEX = "Work" -DEFAULT_KEY = "title" -DEFAULT_ATTRIBUTES = ("title,alternate_title,collection,contributor,creator," - "date_created,description,genre,language,library_unit," - "location,physical_description_material,physical_description_size," - "published,rights_statement,scope_and_contents,series,source," - "style_period,subject,table_of_contents,technique,visibility," - "work_type") - -class Websocket: - def __init__(self, endpoint_url, connection_id, ref): - self.client = boto3.client('apigatewaymanagementapi', endpoint_url=endpoint_url) - self.connection_id = connection_id - self.ref = ref - - def send(self, data): - data['ref'] = self.ref - data_as_bytes = bytes(json.dumps(data), 'utf-8') - self.client.post_to_connection(Data=data_as_bytes, ConnectionId=self.connection_id) - -class StreamingSocketCallbackHandler(BaseCallbackHandler): - def __init__(self, socket: Websocket): - self.socket = socket - - def on_llm_new_token(self, token: str, **kwargs): - self.socket.send({'token': token}) - -def handler(event, context): - try: - payload = json.loads(event.get('body', '{}')) - - request_context = event.get('requestContext', {}) - connection_id = request_context.get('connectionId') - endpoint_url = f'https://{request_context.get("domainName")}/{request_context.get("stage")}' - ref = payload.get('ref') - socket = Websocket(connection_id=connection_id, endpoint_url=endpoint_url, ref=ref) - - - api_token = ApiToken(signed_token=payload.get("auth")) - if not api_token.is_logged_in(): - socket.send({ "statusCode": 401, "body": "Unauthorized" }) - return { - "statusCode": 401, - "body": "Unauthorized" - } - - question = payload.get("question") - index_name = payload.get("index", DEFAULT_INDEX) - text_key = payload.get("text_key", DEFAULT_KEY) - attributes = [ - item for item - in set(payload.get("attributes", DEFAULT_ATTRIBUTES).split(",")) - if item not in [text_key, "source"] - ] - - weaviate = setup.weaviate_vector_store(index_name=index_name, - text_key=text_key, - attributes=attributes + ["source"]) - - client = setup.openai_chat_client(callbacks=[StreamingSocketCallbackHandler(socket)], streaming=True) - - prompt = PromptTemplate( - template=prompt_template(), - input_variables=["question", "context"] - ) - - document_prompt = PromptTemplate( - template=document_template(attributes), - input_variables=["page_content", "source"] + attributes, - ) - - docs = weaviate.similarity_search(question, k=10, additional="certainty") - chain = load_qa_with_sources_chain( - client, - chain_type="stuff", - prompt=prompt, - document_prompt=document_prompt, - document_variable_name="context", - verbose=to_bool(os.getenv("VERBOSE")) - ) - +def handler(event, _context): try: - doc_response = [doc.__dict__ for doc in docs] - socket.send({"question": question, "source_documents": doc_response}) - response = chain({"question": question, "input_documents": docs}) - response = { - "answer": response["output_text"], - } - socket.send(response) - except InvalidRequestError as err: - response = { - "question": question, - "answer": str(err), - "source_documents": [] - } - socket.send(response) - - return {'statusCode': 200} - except Exception as err: - print(event) - raise err - -def to_bool(val): - if isinstance(val, str): - return val.lower() not in ["", "no", "false", "0"] - return bool(val) + config = EventConfig(event) + socket = event.get('socket', None) + config.setup_websocket(socket) + + if not config.is_logged_in: + config.socket.send({"type": "error", "message": "Unauthorized"}) + return {"statusCode": 401, "body": "Unauthorized"} + + if config.debug_mode: + config.socket.send(config.debug_message()) + + if not os.getenv("SKIP_WEAVIATE_SETUP"): + config.setup_llm_request() + final_response = prepare_response(config) + config.socket.send(final_response) + return {"statusCode": 200} + + except Exception as err: + if err.__class__.__name__ == "PayloadTooLargeException": + config.socket.send({"type": "error", "message": "Payload too large"}) + return {"statusCode": 413, "body": "Payload too large"} + else: + raise err diff --git a/chat/src/handlers/streaming_socket_callback_handler.py b/chat/src/handlers/streaming_socket_callback_handler.py new file mode 100644 index 00000000..5bc1d012 --- /dev/null +++ b/chat/src/handlers/streaming_socket_callback_handler.py @@ -0,0 +1,11 @@ +from langchain.callbacks.base import BaseCallbackHandler +from websocket import Websocket + +class StreamingSocketCallbackHandler(BaseCallbackHandler): + def __init__(self, socket: Websocket, debug_mode: bool): + self.socket = socket + self.debug_mode = debug_mode + + def on_llm_new_token(self, token: str, **kwargs): + if self.socket and not self.debug_mode: + return self.socket.send({"token": token}) diff --git a/chat/src/helpers/__init__.py b/chat/src/helpers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/chat/src/helpers/apitoken.py b/chat/src/helpers/apitoken.py index 4c6ecbd4..46c97263 100644 --- a/chat/src/helpers/apitoken.py +++ b/chat/src/helpers/apitoken.py @@ -2,27 +2,34 @@ import jwt import os + class ApiToken: - @classmethod - def empty_token(cls): - time = int(datetime.now().timestamp()) - return { - 'iss': os.getenv('DC_API_ENDPOINT'), - 'exp': datetime.fromtimestamp(time + 12 * 60 * 60).timestamp(), # 12 hours - 'iat': time, - 'entitlements': [], - 'isLoggedIn': False, - } - - def __init__(self, signed_token=None): - if signed_token is None: - self.token = ApiToken.empty_token() - else: - try: - secret = os.getenv("API_TOKEN_SECRET") - self.token = jwt.decode(signed_token, secret, algorithms=["HS256"]) - except Exception: - self.token = ApiToken.empty_token() + @classmethod + def empty_token(cls): + time = int(datetime.now().timestamp()) + return { + "iss": os.getenv("DC_API_ENDPOINT"), + "exp": datetime.fromtimestamp(time + 12 * 60 * 60).timestamp(), # 12 hours + "iat": time, + "entitlements": [], + "isLoggedIn": False, + } + + def __init__(self, signed_token=None): + if signed_token is None: + self.token = ApiToken.empty_token() + else: + try: + secret = os.getenv("API_TOKEN_SECRET") + self.token = jwt.decode(signed_token, secret, algorithms=["HS256"]) + except Exception: + self.token = ApiToken.empty_token() + + def __str__(self): + return f"ApiToken(token={self.token})" + + def is_logged_in(self): + return self.token.get("isLoggedIn", False) - def is_logged_in(self): - return self.token.get("isLoggedIn", False) + def is_superuser(self): + return self.token.get("isSuperUser", False) diff --git a/chat/src/helpers/metrics.py b/chat/src/helpers/metrics.py new file mode 100644 index 00000000..168cd02f --- /dev/null +++ b/chat/src/helpers/metrics.py @@ -0,0 +1,18 @@ +import tiktoken + + +def token_usage(config, response, original_question): + return { + "question": count_tokens(config.question), + "answer": count_tokens(response["output_text"]), + "prompt": count_tokens(config.prompt_text), + "source_documents": count_tokens(original_question["source_documents"]), + } + + +def count_tokens(val): + encoding = tiktoken.encoding_for_model("gpt-4") + token_integers = encoding.encode(str(val)) + num_tokens = len(token_integers) + + return num_tokens diff --git a/chat/src/helpers/prompts.py b/chat/src/helpers/prompts.py index b79510e8..32ffbc46 100644 --- a/chat/src/helpers/prompts.py +++ b/chat/src/helpers/prompts.py @@ -1,153 +1,23 @@ -# ruff: noqa: E501 -def prompt_template(): - return """Using all of the provided source documents, create a helpful and thorough answer to the supplied question. - If you don't know the answer, just say that you don't know. Don't try to make up an answer, but you should use the documents provided in order to ground your response. - It may be helpful to explain why a provided document does not pertain to the query as well. - Feel free to reference various aspects of the sources in your explanation, but please don't include the full sources in the answer. - The Content field represents the title of each document, and the Metadata fields are the attributes. The Source field is the unique identifier for each document. - 'certainty' is an opinionated measure of the distance between the query vector and the document embedding vector. Certainty always returns a number between 0 and 1, with 1 indicating identical vectors and 0 indicating opposing angles. +from typing import List, Optional - Content: Purchase order and note - Metadata: - _additional: {{'certainty': 0.8744078576564789, 'id': '29389b8d-a85d-46d1-9a6d-a738c6f81c88'}} - alternate_title: None - collection: Berkeley Folk Music Festival - contributor: ['University of California, Berkeley. Associated Students', 'Berkeley Folk Music Festival'] - creator: None - date_created: ['October 7, 1970', '1970?'] - description: ['Purchase order for costs related to security for the 1970 Berkeley Folk Music Festival and a handwritten note containing calculations and the heading "Police"'] - genre: ['notes (documents)', 'purchase orders'] - language: ['English'] - library_unit: Charles Deering McCormick Library of Special Collections - location: None - physical_description_material: None - physical_description_size: ['5 inches (height) x 3 inches (width)', '7 inches (height) x 8.5 inches (width)'] - published: True - rights_statement: In Copyright - scope_and_contents: None - series: ['Berkeley Folk Music Festival Archive--3. Festivals: Records, Budgets, Publicity'] - source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88 - style_period: None - subject: ['Berkeley Folk Music Festival (15th : 1970 : Berkeley, Calif.)'] - table_of_contents: None - technique: None - visibility: Public - work_type: Image - Source: 29389b8d-a85d-46d1-9a6d-a738c6f81c88 - - Content: Berkeley Folk Music Festival, 1966 June 26-30 - Metadata: - _additional: {{'certainty': 0.869585394859314, 'id': '477e3f63-fc06-4bfc-8734-0b6100c0d1c3'}} - alternate_title: None - collection: Berkeley Folk Music Festival - contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] - creator: None - date_created: ['1966'] - description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.'] - genre: ['posters'] - language: ['English'] - library_unit: Charles Deering McCormick Library of Special Collections - location: None - physical_description_material: None - physical_description_size: ['12.75 inches (height) x 12.75 inches (width)'] - published: True - rights_statement: In Copyright - scope_and_contents: None - series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters'] - source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3 - style_period: None - subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Hawes, Bess Lomax, 1921-2009'] - table_of_contents: None - technique: None - visibility: Public - work_type: Image - Source: 477e3f63-fc06-4bfc-8734-0b6100c0d1c3 - Content: Berkeley Folk Music Festival, 1966 June 26-30 - Metadata: - _additional: {{'certainty': 0.8694239258766174, 'id': 'bddeb375-762b-45e3-9e4e-5a4084ac5955'}} - alternate_title: None - collection: Berkeley Folk Music Festival - contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] - creator: None - date_created: ['1966'] - description: ['Poster for the Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger.'] - genre: ['posters'] - language: ['English'] - library_unit: Charles Deering McCormick Library of Special Collections - location: None - physical_description_material: None - physical_description_size: ['13.75 inches (height) x 21.75 inches (width)'] - published: True - rights_statement: In Copyright - scope_and_contents: None - series: ['Berkeley Folk Music Festival Archive--9. Posters of Berkeley Folk Music Festivals'] - source: bddeb375-762b-45e3-9e4e-5a4084ac5955 - style_period: None - subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival'] - table_of_contents: None - technique: None - visibility: Public - work_type: Image - Source: bddeb375-762b-45e3-9e4e-5a4084ac5955 +def prompt_template() -> str: + return """Please answer the question based on the documents provided, and include some details about why the documents might be relevant to the particular question: - Content: Berkeley Folk Music Festival, 1966 June 30-July 4 - Metadata: - _additional: {{'certainty': 0.8693937957286835, 'id': 'aab0bb76-ab02-429a-843a-5be56e31ba67'}} - alternate_title: None - collection: Berkeley Folk Music Festival - contributor: ['Olivier, Barry, 1935-', 'Hart, Kelly, 1943-', 'University of California, Berkeley. Associated Students'] - creator: None - date_created: ['1966'] - description: ['Poster for the 9th Annual Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, 1966, presented by the Associated Students. White text on black background between black and white images of a man playing a fiddle and another man singing into a mic while holding a guitar. Guest list includes Pete Seeger, Jefferson Airplane, Sam Hinton, Greenbriar Boys, Shlomo Carlebach, John Fahey, Los Halcones de Salitrillos, Charley Marshall, Phil Ochs, Ralph J. Gleason, Malvina Reynolds, Robert Pete Williams, Alice Stuart Thomas, Bess Lomax Hawes, and Charles Seeger. Originally found in box 28, folder 3.'] - genre: ['posters'] - language: ['English'] - library_unit: Charles Deering McCormick Library of Special Collections - location: None - physical_description_material: None - physical_description_size: ['24.25 inches (height) x 37.5 inches (width)'] - published: True - rights_statement: In Copyright - scope_and_contents: None - series: ['Berkeley Folk Music Festival Archive--13. Miscellaneous Posters'] - source: aab0bb76-ab02-429a-843a-5be56e31ba67 - style_period: None - subject: ['Berkeley (Calif.)', 'University of California, Berkeley', 'Gleason, Ralph J.', 'Folk music', 'Jefferson Airplane (Musical group)', 'Seeger, Pete, 1919-2014', 'Fahey, John, 1939-2001', 'Williams, Robert Pete, 1914-1980', 'Folk music festivals', 'Hinton, Sam, 1917-2009', 'Reynolds, Malvina', 'Halcones de Salitrillo (Musical group)', 'Folk musicians', 'Concerts', 'Carlebach, Shlomo, 1925-1994', 'Marshall, Charley', 'Ochs, Phil', 'Berkeley Folk Music Festival (9th : 1966 : Berkeley, Calif.)', 'Hawes, Bess Lomax, 1921-2009', 'Greenbriar Boys', 'Stuart, Alice, 1942-', 'Seeger, Charles, 1886-1979', 'Berkeley Folk Music Festival'] - table_of_contents: None - technique: None - visibility: Public - work_type: Image - Source: aab0bb76-ab02-429a-843a-5be56e31ba67 +Documents: +{context} - QUESTION: Which musicians played at the Berkeley Folk Music Festival? - HELPFUL ANSWER: For the 1966 Berkeley Folk Music Festival, held at the University of California, Berkeley from June 30 to July 4, the following musicians and groups were listed as performers: +Question: +{question} +""" - Pete Seeger - Jefferson Airplane - Sam Hinton - Greenbriar Boys - Shlomo Carlebach - John Fahey - Los Halcones de Salitrillos - Charley Marshall - Phil Ochs - Ralph J. Gleason - Malvina Reynolds - Robert Pete Williams - Alice Stuart Thomas - Bess Lomax Hawes - Charles Seeger - Unfortunately, the documents provided do not include information about musicians who performed at the Berkeley Folk Music Festival in other years during the 1960s or 1970s. Therefore, I can only confirm the musicians for the 1966 festival. - - {context} - - QUESTION: {question} - ========= - HELPFUL ANSWER:""" - -def document_template(attributes): - lines = (["Content: {page_content}", "Metadata:"] + - [f" {attribute}: {{{attribute}}}" for attribute in attributes] + - ["Source: {source}"]) - return "\n".join(lines) +def document_template(attributes: Optional[List[str]] = None) -> str: + if attributes is None: + attributes = [] + lines = ( + ["Content: {page_content}", "Metadata:"] + + [f" {attribute}: {{{attribute}}}" for attribute in attributes] + + ["Source: {source}"] + ) + return "\n".join(lines) diff --git a/chat/src/helpers/response.py b/chat/src/helpers/response.py new file mode 100644 index 00000000..42b4e4ed --- /dev/null +++ b/chat/src/helpers/response.py @@ -0,0 +1,56 @@ +from helpers.metrics import token_usage +from openai.error import InvalidRequestError + + +def base_response(config, response): + return {"answer": response["output_text"], "ref": config.ref} + + +def debug_response(config, response, original_question): + response_base = base_response(config, response) + debug_info = { + "attributes": config.attributes, + "azure_endpoint": config.azure_endpoint, + "deployment_name": config.deployment_name, + "index": config.index_name, + "is_superuser": config.api_token.is_superuser(), + "k": config.k, + "openai_api_version": config.openai_api_version, + "prompt": config.prompt_text, + "ref": config.ref, + "temperature": config.temperature, + "text_key": config.text_key, + "token_counts": token_usage(config, response, original_question), + } + return {**response_base, **debug_info} + + +def get_and_send_original_question(config, docs): + doc_response = [doc.__dict__ for doc in docs] + original_question = { + "question": config.question, + "source_documents": doc_response, + } + config.socket.send(original_question) + return original_question + + +def prepare_response(config): + try: + docs = config.weaviate.similarity_search( + config.question, k=config.k, additional="certainty" + ) + original_question = get_and_send_original_question(config, docs) + response = config.chain({"question": config.question, "input_documents": docs}) + + if config.debug_mode: + prepared_response = debug_response(config, response, original_question) + else: + prepared_response = base_response(config, response) + except InvalidRequestError as err: + prepared_response = { + "question": config.question, + "error": str(err), + "source_documents": [], + } + return prepared_response diff --git a/chat/src/helpers/utils.py b/chat/src/helpers/utils.py new file mode 100644 index 00000000..d0d243d4 --- /dev/null +++ b/chat/src/helpers/utils.py @@ -0,0 +1,7 @@ +def to_bool(val): + """Converts a value to boolean. If the value is a string, it considers + "", "no", "false", "0" as False. Otherwise, it returns the boolean of the value. + """ + if isinstance(val, str): + return val.lower() not in ["", "no", "false", "0"] + return bool(val) diff --git a/chat/src/requirements.txt b/chat/src/requirements.txt index aa6d612d..8cb0270e 100644 --- a/chat/src/requirements.txt +++ b/chat/src/requirements.txt @@ -1,8 +1,7 @@ # Runtime Dependencies +boto3~=1.34.13 langchain~=0.0.208 -nbformat~=5.9.0 openai~=0.27.8 -pandas~=2.0.2 pyjwt~=2.6.0 python-dotenv~=1.0.0 tiktoken~=0.4.0 diff --git a/chat/src/setup.py b/chat/src/setup.py index 184b856b..cc70c653 100644 --- a/chat/src/setup.py +++ b/chat/src/setup.py @@ -3,33 +3,57 @@ from typing import List import os import weaviate +import boto3 + def openai_chat_client(**kwargs): - deployment = os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_ID") - key = os.getenv("AZURE_OPENAI_API_KEY") - resource = os.getenv("AZURE_OPENAI_RESOURCE_NAME") - version = "2023-07-01-preview" + return AzureChatOpenAI( + openai_api_key=os.getenv("AZURE_OPENAI_API_KEY"), + **kwargs, + ) + + +def weaviate_client(): + if os.getenv("SKIP_WEAVIATE_SETUP"): + return None + + weaviate_url = os.environ.get("WEAVIATE_URL") + try: + if weaviate_url is None: + raise EnvironmentError( + "WEAVIATE_URL is not set in the environment variables" + ) + + weaviate_api_key = os.environ.get("WEAVIATE_API_KEY") + if weaviate_api_key is None: + raise EnvironmentError( + "WEAVIATE_API_KEY is not set in the environment variables" + ) - return AzureChatOpenAI(deployment_name=deployment, - openai_api_key=key, - openai_api_base=f"https://{resource}.openai.azure.com/", - openai_api_version=version, - **kwargs) - + auth_config = weaviate.AuthApiKey(api_key=weaviate_api_key) + + client = weaviate.Client(url=weaviate_url, auth_client_secret=auth_config) + except Exception as e: + print(f"An error occurred: {e}") + client = None + return client def weaviate_vector_store(index_name: str, text_key: str, attributes: List[str] = []): - weaviate_url = os.environ['WEAVIATE_URL'] - weaviate_api_key = os.environ['WEAVIATE_API_KEY'] - # openai_api_key = os.environ['AZURE_OPENAI_API_KEY'] - - auth_config = weaviate.AuthApiKey(api_key=weaviate_api_key) - - client = weaviate.Client( - url=weaviate_url, - auth_client_secret=auth_config - ) - return Weaviate(client=client, - index_name=index_name, - text_key=text_key, - attributes=attributes) + if os.getenv("SKIP_WEAVIATE_SETUP"): + return None + + client = weaviate_client() + + return Weaviate( + client=client, index_name=index_name, text_key=text_key, attributes=attributes + ) + + +def websocket_client(endpoint_url: str): + endpoint_url = endpoint_url or os.getenv("APIGATEWAY_URL") + try: + client = boto3.client("apigatewaymanagementapi", endpoint_url=endpoint_url) + return client + except Exception as e: + raise e \ No newline at end of file diff --git a/chat/src/websocket.py b/chat/src/websocket.py new file mode 100644 index 00000000..dc81179a --- /dev/null +++ b/chat/src/websocket.py @@ -0,0 +1,16 @@ +import json +from setup import websocket_client + +class Websocket: + def __init__(self, client=None, endpoint_url=None, connection_id=None, ref=None): + self.client = client or websocket_client(endpoint_url) + self.connection_id = connection_id + self.ref = ref if ref else {} + + def send(self, data): + if isinstance(data, str): + data = {"message": data} + data["ref"] = self.ref + data_as_bytes = bytes(json.dumps(data), "utf-8") + self.client.post_to_connection(Data=data_as_bytes, ConnectionId=self.connection_id) + return data diff --git a/chat/template.yaml b/chat/template.yaml index 303ec226..24c89b7d 100644 --- a/chat/template.yaml +++ b/chat/template.yaml @@ -192,7 +192,7 @@ Resources: - x86_64 Layers: - !Ref ChatDependencies - MemorySize: 128 + MemorySize: 1024 Handler: handlers/chat.handler Timeout: 300 Environment: diff --git a/chat/test/fixtures/apitoken.py b/chat/test/fixtures/apitoken.py index 0f61693c..08691856 100644 --- a/chat/test/fixtures/apitoken.py +++ b/chat/test/fixtures/apitoken.py @@ -1,5 +1,8 @@ TEST_SECRET = "TEST_SECRET" TEST_TOKEN_NAME = "dcTestToken" +SUPER_TOKEN = ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ4NDM1NzU2ODg5MTIs' + 'ImlhdCI6MTY4Nzg4MDI0NywiaXNMb2dnZWRJbiI6dHJ1ZSwic3ViIjoiYXBpVGVzdF' + 'N1cGVyVXNlciIsImlzU3VwZXJVc2VyIjp0cnVlfQ.uGEdWlhwUr8RHrC6CLCV5_pOrQDTw41kM6_X99AEg1Q') TEST_TOKEN = ('eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJleHAiOjQ4NDM1ODY2MDYxNjUs' 'ImlhdCI6MTY4Nzg5MTM2OSwiZW50aXRsZW1lbnRzIjpbXSwiaXNMb2dnZWRJbiI6d' 'HJ1ZSwic3ViIjoidGVzdFVzZXIifQ.vIZag1pHE1YyrxsKKlakXX_44ckAvkg7xWOoA_w4x58') diff --git a/chat/test/handlers/test_chat.py b/chat/test/handlers/test_chat.py new file mode 100644 index 00000000..21c9b643 --- /dev/null +++ b/chat/test/handlers/test_chat.py @@ -0,0 +1,74 @@ +# ruff: noqa: E402 + +import json +import os +import sys + +sys.path.append('./src') + +from unittest import mock, TestCase +from unittest.mock import patch +from handlers.chat import handler +from helpers.apitoken import ApiToken +from websocket import Websocket +from event_config import EventConfig + +class MockClient: + def __init__(self): + self.received_data = None + + def post_to_connection(self, Data, ConnectionId): + self.received_data = Data + return Data + + +@mock.patch.dict( + os.environ, + { + "AZURE_OPENAI_RESOURCE_NAME": "test", + }, +) +class TestHandler(TestCase): + def test_handler_unauthorized(self): + event = {"socket": Websocket(client=MockClient(), endpoint_url="test", connection_id="test", ref="test")} + self.assertEqual(handler(event, {}), {'body': 'Unauthorized', 'statusCode': 401}) + + @patch.object(ApiToken, 'is_logged_in') + def test_handler_success(self, mock_is_logged_in): + mock_is_logged_in.return_value = True + event = {"socket": Websocket(client=MockClient(), endpoint_url="test", connection_id="test", ref="test")} + self.assertEqual(handler(event, {}), {'statusCode': 200}) + + @patch.object(ApiToken, 'is_logged_in') + @patch.object(ApiToken, 'is_superuser') + @patch.object(EventConfig, '_is_debug_mode_enabled') + def test_handler_debug_mode(self, mock_is_debug_enabled, mock_is_logged_in, mock_is_superuser): + mock_is_debug_enabled.return_value = True + mock_is_logged_in.return_value = True + mock_is_superuser.return_value = True + mock_client = MockClient() + mock_websocket = Websocket(client=mock_client, endpoint_url="test", connection_id="test", ref="test") + event = {"socket": mock_websocket, "debug": True} + handler(event, {}) + response = json.loads(mock_client.received_data) + self.assertEqual(response["type"], "debug") + + @patch.object(ApiToken, 'is_logged_in') + @patch.object(ApiToken, 'is_superuser') + @patch.object(EventConfig, '_is_debug_mode_enabled') + def test_handler_debug_mode_for_superusers_only(self, mock_is_debug_enabled, mock_is_logged_in, mock_is_superuser): + mock_is_debug_enabled.return_value = True + mock_is_logged_in.return_value = True + mock_is_superuser.return_value = False + mock_client = MockClient() + mock_websocket = Websocket(client=mock_client, endpoint_url="test", connection_id="test", ref="test") + event = {"socket": mock_websocket, "debug": True} + handler(event, {}) + response = json.loads(mock_client.received_data) + self.assertEqual(response["type"], "error") + + @patch.object(EventConfig, 'setup_websocket') + def test_error_handling(self, mock_event): + mock_event.side_effect = Exception("Some error occurred") + with self.assertRaises(Exception): + handler({}, {}) \ No newline at end of file diff --git a/chat/test/handlers/test_streaming_socket_callback_handler.py b/chat/test/handlers/test_streaming_socket_callback_handler.py new file mode 100644 index 00000000..5293a6a2 --- /dev/null +++ b/chat/test/handlers/test_streaming_socket_callback_handler.py @@ -0,0 +1,26 @@ +# ruff: noqa: E402 +import sys +sys.path.append('./src') + +from unittest import TestCase +from handlers.streaming_socket_callback_handler import ( + StreamingSocketCallbackHandler, +) +from websocket import Websocket + + + +class MockClient: + def post_to_connection(self, Data, ConnectionId): + return Data + +class TestMyStreamingSocketCallbackHandler(TestCase): + def test_on_new_llm_token(self): + handler = StreamingSocketCallbackHandler(Websocket(client=MockClient()), False) + result = handler.on_llm_new_token(token="test") + self.assertEqual(result, {'token': 'test', 'ref': {}}) + self.assertFalse(handler.debug_mode) + + def test_debug_mode(self): + handler = StreamingSocketCallbackHandler(Websocket(client=MockClient()), debug_mode=True) + self.assertTrue(handler.debug_mode) diff --git a/chat/test/helpers/test_apitoken.py b/chat/test/helpers/test_apitoken.py index ce6b6ae0..a330f56a 100644 --- a/chat/test/helpers/test_apitoken.py +++ b/chat/test/helpers/test_apitoken.py @@ -1,24 +1,44 @@ +# ruff: noqa: E402 import os -from src.helpers.apitoken import ApiToken -from test.fixtures.apitoken import TEST_SECRET, TEST_TOKEN +import sys +sys.path.append('./src') + +from helpers.apitoken import ApiToken +from test.fixtures.apitoken import SUPER_TOKEN, TEST_SECRET, TEST_TOKEN from unittest import mock, TestCase -@mock.patch.dict( - os.environ, - { - "API_TOKEN_SECRET": TEST_SECRET - } -) + + + +@mock.patch.dict(os.environ, {"API_TOKEN_SECRET": TEST_SECRET}) class TestFunction(TestCase): - def test_empty_token(self): - subject = ApiToken() - self.assertFalse(subject.is_logged_in()) - - def test_valid_token(self): - subject = ApiToken(TEST_TOKEN) - self.assertTrue(subject.is_logged_in()) - - def test_invalid_token(self): - subject = ApiToken("INVALID_TOKEN") - self.assertFalse(subject.is_logged_in()) - \ No newline at end of file + def test_empty_token(self): + subject = ApiToken() + self.assertIsInstance(subject, ApiToken) + self.assertFalse(subject.is_logged_in()) + + def test_valid_token(self): + subject = ApiToken(TEST_TOKEN) + self.assertIsInstance(subject, ApiToken) + self.assertTrue(subject.is_logged_in()) + self.assertFalse(subject.is_superuser()) + + def test_superuser_token(self): + subject = ApiToken(SUPER_TOKEN) + self.assertIsInstance(subject, ApiToken) + self.assertTrue(subject.is_logged_in()) + self.assertTrue(subject.is_superuser()) + + def test_invalid_token(self): + subject = ApiToken("INVALID_TOKEN") + self.assertIsInstance(subject, ApiToken) + self.assertFalse(subject.is_logged_in()) + + def test_empty_token_class_method(self): + empty_token = ApiToken.empty_token() + self.assertIsInstance(empty_token, dict) + self.assertFalse(empty_token["isLoggedIn"]) + + def test_str_method(self): + subject = ApiToken(TEST_TOKEN) + self.assertEqual(str(subject), f"ApiToken(token={subject.token})") diff --git a/chat/test/helpers/test_metrics.py b/chat/test/helpers/test_metrics.py new file mode 100644 index 00000000..651043eb --- /dev/null +++ b/chat/test/helpers/test_metrics.py @@ -0,0 +1,64 @@ +# ruff: noqa: E402 +import json +import os +import sys +sys.path.append('./src') + +from unittest import TestCase, mock +from helpers.metrics import count_tokens, token_usage +from event_config import EventConfig + + + +@mock.patch.dict( + os.environ, + { + "AZURE_OPENAI_RESOURCE_NAME": "test", + "WEAVIATE_URL": "http://test", + "WEAVIATE_API_KEY": "test" + }, +) +class TestMetrics(TestCase): + def test_token_usage(self): + original_question = { + "question": "What is your name?", + "source_documents": [], + } + event = { + "body": json.dumps({ + "deployment_name": "test", + "index": "test", + "k": 1, + "openai_api_version": "2019-05-06", + "prompt": "This is a test prompt.", + "question": original_question, + "ref": "test", + "temperature": 0.5, + "text_key": "text", + "auth": "test123" + }) + } + config = EventConfig(event=event) + + response = { + "output_text": "This is a test response.", + } + + result = token_usage(config, response, original_question) + + expected_result = { + "answer": 6, + "prompt": 36, + "question": 15, + "source_documents": 1, + } + + self.assertEqual(result, expected_result) + + def test_count_tokens(self): + val = "Hello, world!" + expected_result = 4 + + result = count_tokens(val) + + self.assertEqual(result, expected_result) diff --git a/chat/test/helpers/test_prompts.py b/chat/test/helpers/test_prompts.py new file mode 100644 index 00000000..9508f32a --- /dev/null +++ b/chat/test/helpers/test_prompts.py @@ -0,0 +1,33 @@ +# ruff: noqa: E402 +import sys +sys.path.append('./src') + +from helpers.prompts import prompt_template, document_template +from unittest import TestCase + + +class TestPromptTemplate(TestCase): + def test_prompt_template(self): + prompt = prompt_template() + assert isinstance(prompt, str) + assert len(prompt) > 0 + + +class TestDocumentTemplate(TestCase): + def test_empty_attributes(self): + self.assertEqual( + document_template(), + "Content: {page_content}\nMetadata:\nSource: {source}", + ) + + def test_single_attribute(self): + self.assertEqual( + document_template(["title"]), + "Content: {page_content}\nMetadata:\n title: {title}\nSource: {source}", + ) + + def test_multiple_attributes(self): + self.assertEqual( + document_template(["title", "author", "subject", "description"]), + "Content: {page_content}\nMetadata:\n title: {title}\n author: {author}\n subject: {subject}\n description: {description}\nSource: {source}", + ) diff --git a/chat/test/test_event_config.py b/chat/test/test_event_config.py new file mode 100644 index 00000000..8d8c02c1 --- /dev/null +++ b/chat/test/test_event_config.py @@ -0,0 +1,112 @@ +# ruff: noqa: E402 +import json +import os +import sys +sys.path.append('./src') + +from event_config import EventConfig +from unittest import TestCase, mock + + +class TestEventConfigWithoutAzureResource(TestCase): + def test_requires_an_azure_resource(self): + with self.assertRaises(EnvironmentError): + EventConfig() + + +@mock.patch.dict( + os.environ, + { + "AZURE_OPENAI_RESOURCE_NAME": "test", + }, +) +class TestEventConfig(TestCase): + def test_fetches_attributes_from_vector_database(self): + os.environ.pop("AZURE_OPENAI_RESOURCE_NAME", None) + with self.assertRaises(EnvironmentError): + EventConfig() + + def test_defaults(self): + actual = EventConfig(event={"body": json.dumps({"attributes": ["title"]})}) + expected_defaults = {"azure_endpoint": "https://test.openai.azure.com/"} + self.assertEqual(actual.azure_endpoint, expected_defaults["azure_endpoint"]) + + def test_attempt_override_without_superuser_status(self): + actual = EventConfig( + event={ + "body": json.dumps( + { + "azure_resource_name": "new_name_for_test", + "attributes": ["title", "subject", "date_created"], + "index": "testIndex", + "k": 100, + "openai_api_version": "2024-01-01", + "question": "test question", + "ref": "test ref", + "temperature": 0.9, + "text_key": "accession_number", + } + ) + } + ) + expected_output = { + "attributes": [], + "azure_endpoint": "https://test.openai.azure.com/", + "index_name": "DCWork", + "k": 10, + "openai_api_version": "2023-07-01-preview", + "question": "test question", + "ref": "test ref", + "temperature": 0.2, + "text_key": "title", + } + self.assertEqual(actual.azure_endpoint, expected_output["azure_endpoint"]) + self.assertEqual(actual.index_name, expected_output["index_name"]) + self.assertEqual(actual.attributes, expected_output["attributes"]) + self.assertEqual(actual.k, expected_output["k"]) + self.assertEqual( + actual.openai_api_version, expected_output["openai_api_version"] + ) + self.assertEqual(actual.question, expected_output["question"]) + self.assertEqual(actual.ref, expected_output["ref"]) + self.assertEqual(actual.temperature, expected_output["temperature"]) + self.assertEqual(actual.text_key, expected_output["text_key"]) + + def test_text_key_removed_from_attributes_list(self): + actual = EventConfig( + event={ + "body": json.dumps( + { + "attributes": ["title", "description"], + "text_key": "description", + } + ) + } + ) + self.assertNotIn(actual.text_key, actual.attributes) + + def test_source_removed_from_attributes_list(self): + actual = EventConfig(event={"body": json.dumps({"attributes": ["source"]})}) + self.assertNotIn("source", actual.attributes) + + def test_debug_message(self): + self.assertEqual( + EventConfig( + event={"body": json.dumps({"attributes": ["source"]})} + ).debug_message()["type"], + "debug", + ) + + def test_to_bool(self): + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool(""), False) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool("0"), False) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool("no"), False) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool("false"), False) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool("False"), False) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool("FALSE"), False) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool("no"), False) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool("No"), False) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool("NO"), False) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool("true"), True) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool(True), True) + self.assertEqual(EventConfig(event={"body": json.dumps({"attributes": ["source"]})})._to_bool(False), False) diff --git a/chat/test/test_websocket.py b/chat/test/test_websocket.py new file mode 100644 index 00000000..4d4d8b76 --- /dev/null +++ b/chat/test/test_websocket.py @@ -0,0 +1,18 @@ +# ruff: noqa: E402 +import sys +sys.path.append('./src') + +from unittest import TestCase +from websocket import Websocket + + +class MockClient: + def post_to_connection(self, Data, ConnectionId): + return Data + +class TestWebsocket(TestCase): + def test_post_to_connection(self): + websocket = Websocket(client=MockClient(), connection_id="test_connection_id", ref="test_ref") + message = "test_message" + expected = {"message": "test_message", "ref": "test_ref"} + self.assertEqual(websocket.send(message), expected) \ No newline at end of file diff --git a/template.yaml b/template.yaml index 2c1ab611..3256c546 100644 --- a/template.yaml +++ b/template.yaml @@ -669,6 +669,8 @@ Resources: Properties: Handler: handlers/get-chat-endpoint.handler Description: Returns the URI of the chat websocket API. + Layers: + - !Ref apiDependencies Environment: Variables: WEBSOCKET_URI: !GetAtt chatWebsocket.Outputs.WebSocketURI From 793b9accae136412f46dcee467236c3bb366edcf Mon Sep 17 00:00:00 2001 From: Karen Shaw Date: Fri, 23 Feb 2024 22:19:26 +0000 Subject: [PATCH 6/6] Update chat handler for Opensearch Update permissions on chat websocket function Add AWS4Auth to opensearch client Tweak EventConfig to make chat work with OpenSearch --- .gitignore | 2 + Makefile | 2 + chat/dependencies/requirements.txt | 7 ++- chat/src/content_handler.py | 36 ++++++++++++ chat/src/event_config.py | 90 ++++++++++++++---------------- chat/src/handlers/chat.py | 13 +++-- chat/src/helpers/prompts.py | 4 +- chat/src/helpers/response.py | 23 ++++++-- chat/src/requirements.txt | 7 ++- chat/src/setup.py | 75 +++++++++++++------------ chat/src/websocket.py | 6 +- chat/template.yaml | 24 ++++++-- chat/test/handlers/test_chat.py | 12 +++- chat/test/helpers/test_prompts.py | 6 +- chat/test/test_event_config.py | 23 +------- template.yaml | 13 ++--- 16 files changed, 199 insertions(+), 144 deletions(-) create mode 100644 chat/src/content_handler.py diff --git a/.gitignore b/.gitignore index 3c9b74f5..da679d9f 100644 --- a/.gitignore +++ b/.gitignore @@ -221,6 +221,8 @@ $RECYCLE.BIN/ /docs/docs/spec/openapi.json /docs/site +.venv + .vscode /samconfig.toml /samconfig.yaml diff --git a/Makefile b/Makefile index dded3f4f..6c3a5a06 100644 --- a/Makefile +++ b/Makefile @@ -40,6 +40,8 @@ cover-html-python: deps-python cd chat && export SKIP_WEAVIATE_SETUP=True && coverage run --source=src -m unittest -v && coverage html --skip-empty style-python: deps-python cd chat && ruff check . +style-python-fix: deps-python + cd chat && ruff check --fix . test-python: deps-python cd chat && export SKIP_WEAVIATE_SETUP=True && PYTHONPATH=src:test && python -m unittest discover -v python-version: diff --git a/chat/dependencies/requirements.txt b/chat/dependencies/requirements.txt index 6bee442a..f80af593 100644 --- a/chat/dependencies/requirements.txt +++ b/chat/dependencies/requirements.txt @@ -1,8 +1,11 @@ boto3~=1.34.13 -langchain~=0.0.208 +langchain~=0.1.8 +langchain-community openai~=0.27.8 +opensearch-py pyjwt~=2.6.0 python-dotenv~=1.0.0 +requests +requests-aws4auth tiktoken~=0.4.0 -weaviate-client~=3.19.2 wheel~=0.40.0 \ No newline at end of file diff --git a/chat/src/content_handler.py b/chat/src/content_handler.py new file mode 100644 index 00000000..b75f98b9 --- /dev/null +++ b/chat/src/content_handler.py @@ -0,0 +1,36 @@ +import json +from typing import Dict, List +from langchain_community.embeddings.sagemaker_endpoint import EmbeddingsContentHandler + +class ContentHandler(EmbeddingsContentHandler): + content_type = "application/json" + accepts = "application/json" + + def transform_input(self, inputs: list[str], model_kwargs: Dict) -> bytes: + """ + Transforms the input into bytes that can be consumed by SageMaker endpoint. + Args: + inputs: List of input strings. + model_kwargs: Additional keyword arguments to be passed to the endpoint. + Returns: + The transformed bytes input. + """ + # Example: inference.py expects a JSON string with a "inputs" key: + input_str = json.dumps({"inputs": inputs, **model_kwargs}) + return input_str.encode("utf-8") + + def transform_output(self, output: bytes) -> List[List[float]]: + """ + Transforms the bytes output from the endpoint into a list of embeddings. + Args: + output: The bytes output from SageMaker endpoint. + Returns: + The transformed output - list of embeddings + Note: + The length of the outer list is the number of input strings. + The length of the inner lists is the embedding dimension. + """ + # Example: inference.py returns a JSON string with the list of + # embeddings in a "vectors" key: + response_json = json.loads(output.read().decode("utf-8")) + return [response_json["embedding"]] \ No newline at end of file diff --git a/chat/src/event_config.py b/chat/src/event_config.py index 5c7762b3..9c9facbc 100644 --- a/chat/src/event_config.py +++ b/chat/src/event_config.py @@ -5,8 +5,8 @@ from langchain.chains.qa_with_sources import load_qa_with_sources_chain from langchain.prompts import PromptTemplate from setup import ( - weaviate_client, - weaviate_vector_store, + opensearch_client, + opensearch_vector_store, openai_chat_client, ) from typing import List @@ -15,17 +15,14 @@ from helpers.prompts import document_template, prompt_template from websocket import Websocket - CHAIN_TYPE = "stuff" DOCUMENT_VARIABLE_NAME = "context" -INDEX_NAME = "DCWork" -K_VALUE = 10 +K_VALUE = 5 MAX_K = 100 TEMPERATURE = 0.2 TEXT_KEY = "title" VERSION = "2023-07-01-preview" - @dataclass class EventConfig: """ @@ -33,6 +30,12 @@ class EventConfig: Default values are set for the following properties which can be overridden in the payload message. """ + DEFAULT_ATTRIBUTES = ["accession_number", "alternate_title", "api_link", "canonical_link", "caption", "collection", + "contributor", "date_created", "date_created_edtf", "description", "genre", "id", "identifier", + "keywords", "language", "notes", "physical_description_material", "physical_description_size", + "provenance", "publisher", "rights_statement", "subject", "table_of_contents", "thumbnail", + "title", "visibility", "work_type"] + api_token: ApiToken = field(init=False) attributes: List[str] = field(init=False) azure_endpoint: str = field(init=False) @@ -41,7 +44,6 @@ class EventConfig: deployment_name: str = field(init=False) document_prompt: PromptTemplate = field(init=False) event: dict = field(default_factory=dict) - index_name: str = field(init=False) is_logged_in: bool = field(init=False) k: int = field(init=False) openai_api_version: str = field(init=False) @@ -54,7 +56,7 @@ class EventConfig: temperature: float = field(init=False) socket: Websocket = field(init=False, default=None) text_key: str = field(init=False) - + def __post_init__(self): self.payload = json.loads(self.event.get("body", "{}")) self.api_token = ApiToken(signed_token=self.payload.get("auth")) @@ -64,7 +66,6 @@ def __post_init__(self): self.azure_endpoint = self._get_azure_endpoint() self.debug_mode = self._is_debug_mode_enabled() self.deployment_name = self._get_deployment_name() - self.index_name = self._get_index_name() self.is_logged_in = self.api_token.is_logged_in() self.k = self._get_k() self.openai_api_version = self._get_openai_api_version() @@ -74,9 +75,10 @@ def __post_init__(self): self.ref = self.payload.get("ref") self.temperature = self._get_temperature() self.text_key = self._get_text_key() - self.attributes = self._get_attributes() self.document_prompt = self._get_document_prompt() - self.prompt = PromptTemplate(template=self.prompt_text, input_variables=["question", "context"]) + self.prompt = PromptTemplate( + template=self.prompt_text, input_variables=["question", "context"] + ) def _get_payload_value_with_superuser_check(self, key, default): if self.api_token.is_superuser(): @@ -84,65 +86,58 @@ def _get_payload_value_with_superuser_check(self, key, default): else: return default + def _get_attributes_function(self): + try: + opensearch = opensearch_client() + mapping = opensearch.indices.get_mapping(index="dc-v2-work") + return list(next(iter(mapping.values()))['mappings']['properties'].keys()) + except StopIteration: + return [] + + def _get_attributes(self): + return self._get_payload_value_with_superuser_check("attributes", self.DEFAULT_ATTRIBUTES) + def _get_azure_endpoint(self): default = f"https://{self._get_azure_resource_name()}.openai.azure.com/" return self._get_payload_value_with_superuser_check("azure_endpoint", default) def _get_azure_resource_name(self): - azure_resource_name = self._get_payload_value_with_superuser_check("azure_resource_name", os.environ.get("AZURE_OPENAI_RESOURCE_NAME")) + azure_resource_name = self._get_payload_value_with_superuser_check( + "azure_resource_name", os.environ.get("AZURE_OPENAI_RESOURCE_NAME") + ) if not azure_resource_name: raise EnvironmentError( "Either payload must contain 'azure_resource_name' or environment variable 'AZURE_OPENAI_RESOURCE_NAME' must be set" ) return azure_resource_name - + def _get_deployment_name(self): - return self._get_payload_value_with_superuser_check("deployment_name", os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_ID")) - - def _get_index_name(self): - return self._get_payload_value_with_superuser_check("index", INDEX_NAME) + return self._get_payload_value_with_superuser_check( + "deployment_name", os.getenv("AZURE_OPENAI_LLM_DEPLOYMENT_ID") + ) def _get_k(self): value = self._get_payload_value_with_superuser_check("k", K_VALUE) return min(value, MAX_K) def _get_openai_api_version(self): - return self._get_payload_value_with_superuser_check("openai_api_version", VERSION) - + return self._get_payload_value_with_superuser_check( + "openai_api_version", VERSION + ) + def _get_prompt_text(self): return self._get_payload_value_with_superuser_check("prompt", prompt_template()) - + def _get_temperature(self): return self._get_payload_value_with_superuser_check("temperature", TEMPERATURE) def _get_text_key(self): return self._get_payload_value_with_superuser_check("text_key", TEXT_KEY) - def _get_attributes(self): - attributes = [ - item - for item in self._get_request_attributes() - if item not in [self._get_text_key(), "source", "full_text"] - ] - return attributes - - def _get_request_attributes(self): - if os.getenv("SKIP_WEAVIATE_SETUP"): - return [] - - attributes = self._get_payload_value_with_superuser_check("attributes", []) - if attributes: - return attributes - else: - client = weaviate_client() - schema = client.schema.get(self._get_index_name()) - names = [prop["name"] for prop in schema.get("properties")] - return names - def _get_document_prompt(self): return PromptTemplate( template=document_template(self.attributes), - input_variables=["page_content", "source"] + self.attributes, + input_variables=["title", "id"] + self.attributes, ) def debug_message(self): @@ -152,7 +147,6 @@ def debug_message(self): "attributes": self.attributes, "azure_endpoint": self.azure_endpoint, "deployment_name": self.deployment_name, - "index": self.index_name, "k": self.k, "openai_api_version": self.openai_api_version, "prompt": self.prompt_text, @@ -167,7 +161,9 @@ def setup_websocket(self, socket=None): if socket is None: connection_id = self.request_context.get("connectionId") endpoint_url = f'https://{self.request_context.get("domainName")}/{self.request_context.get("stage")}' - self.socket = Websocket(endpoint_url=endpoint_url, connection_id=connection_id, ref=self.ref) + self.socket = Websocket( + endpoint_url=endpoint_url, connection_id=connection_id, ref=self.ref + ) else: self.socket = socket return self.socket @@ -178,11 +174,7 @@ def setup_llm_request(self): self._setup_chain() def _setup_vector_store(self): - self.weaviate = weaviate_vector_store( - index_name=self.index_name, - text_key=self.text_key, - attributes=self.attributes + ["source"], - ) + self.opensearch = opensearch_vector_store() def _setup_chat_client(self): self.client = openai_chat_client( diff --git a/chat/src/handlers/chat.py b/chat/src/handlers/chat.py index aa19ff79..8757b286 100644 --- a/chat/src/handlers/chat.py +++ b/chat/src/handlers/chat.py @@ -1,4 +1,6 @@ import os +import sys +import traceback from event_config import EventConfig from helpers.response import prepare_response @@ -21,9 +23,8 @@ def handler(event, _context): config.socket.send(final_response) return {"statusCode": 200} - except Exception as err: - if err.__class__.__name__ == "PayloadTooLargeException": - config.socket.send({"type": "error", "message": "Payload too large"}) - return {"statusCode": 413, "body": "Payload too large"} - else: - raise err + except Exception: + exc_info = sys.exc_info() + err_text = ''.join(traceback.format_exception(*exc_info)) + print(err_text) + return {"statusCode": 500, "body": f'Unhandled error:\n{err_text}'} diff --git a/chat/src/helpers/prompts.py b/chat/src/helpers/prompts.py index 32ffbc46..397b7005 100644 --- a/chat/src/helpers/prompts.py +++ b/chat/src/helpers/prompts.py @@ -16,8 +16,8 @@ def document_template(attributes: Optional[List[str]] = None) -> str: if attributes is None: attributes = [] lines = ( - ["Content: {page_content}", "Metadata:"] + ["Content: {title}", "Metadata:"] + [f" {attribute}: {{{attribute}}}" for attribute in attributes] - + ["Source: {source}"] + + ["Source: {id}"] ) return "\n".join(lines) diff --git a/chat/src/helpers/response.py b/chat/src/helpers/response.py index 42b4e4ed..a3b946d4 100644 --- a/chat/src/helpers/response.py +++ b/chat/src/helpers/response.py @@ -1,7 +1,6 @@ from helpers.metrics import token_usage from openai.error import InvalidRequestError - def base_response(config, response): return {"answer": response["output_text"], "ref": config.ref} @@ -12,7 +11,6 @@ def debug_response(config, response, original_question): "attributes": config.attributes, "azure_endpoint": config.azure_endpoint, "deployment_name": config.deployment_name, - "index": config.index_name, "is_superuser": config.api_token.is_superuser(), "k": config.k, "openai_api_version": config.openai_api_version, @@ -26,7 +24,13 @@ def debug_response(config, response, original_question): def get_and_send_original_question(config, docs): - doc_response = [doc.__dict__ for doc in docs] + doc_response = [] + for doc in docs: + doc_dict = doc.__dict__ + metadata = doc_dict.get('metadata', {}) + new_doc = {key: extract_prompt_value(metadata.get(key)) for key in config.attributes if key in metadata} + doc_response.append(new_doc) + original_question = { "question": config.question, "source_documents": doc_response, @@ -34,11 +38,18 @@ def get_and_send_original_question(config, docs): config.socket.send(original_question) return original_question - +def extract_prompt_value(v): + if isinstance(v, list): + return [extract_prompt_value(item) for item in v] + elif isinstance(v, dict) and 'label' in v: + return [v.get('label')] + else: + return v + def prepare_response(config): try: - docs = config.weaviate.similarity_search( - config.question, k=config.k, additional="certainty" + docs = config.opensearch.similarity_search( + config.question, k=config.k, vector_field="embedding", text_field="id" ) original_question = get_and_send_original_question(config, docs) response = config.chain({"question": config.question, "input_documents": docs}) diff --git a/chat/src/requirements.txt b/chat/src/requirements.txt index 8cb0270e..04100144 100644 --- a/chat/src/requirements.txt +++ b/chat/src/requirements.txt @@ -1,11 +1,14 @@ # Runtime Dependencies boto3~=1.34.13 -langchain~=0.0.208 +langchain~=0.1.8 +langchain-community openai~=0.27.8 +opensearch-py pyjwt~=2.6.0 python-dotenv~=1.0.0 +requests +requests-aws4auth tiktoken~=0.4.0 -weaviate-client~=3.19.2 wheel~=0.40.0 # Dev/Test Dependencies diff --git a/chat/src/setup.py b/chat/src/setup.py index cc70c653..39a99338 100644 --- a/chat/src/setup.py +++ b/chat/src/setup.py @@ -1,10 +1,16 @@ -from langchain.chat_models import AzureChatOpenAI -from langchain.vectorstores import Weaviate -from typing import List +from content_handler import ContentHandler +from langchain_community.chat_models import AzureChatOpenAI +from langchain_community.embeddings import SagemakerEndpointEmbeddings +from langchain_community.vectorstores import OpenSearchVectorSearch +from opensearchpy import OpenSearch, RequestsHttpConnection +from requests_aws4auth import AWS4Auth import os -import weaviate import boto3 +def prefix(value): + env_prefix = os.getenv("ENV_PREFIX") + env_prefix = None if env_prefix == "" else env_prefix + return '-'.join(filter(None, [env_prefix, value])) def openai_chat_client(**kwargs): return AzureChatOpenAI( @@ -12,42 +18,39 @@ def openai_chat_client(**kwargs): **kwargs, ) +def opensearch_client(region_name=os.getenv("AWS_REGION")): + print(region_name) + session = boto3.Session(region_name=region_name) + awsauth = AWS4Auth(region=region_name, service="es", refreshable_credentials=session.get_credentials()) + endpoint = os.getenv("ELASTICSEARCH_ENDPOINT") + + return OpenSearch( + hosts=[{'host': endpoint, 'port': 443}], + use_ssl = True, + connection_class=RequestsHttpConnection, + http_auth=awsauth, + ) -def weaviate_client(): - if os.getenv("SKIP_WEAVIATE_SETUP"): - return None - - weaviate_url = os.environ.get("WEAVIATE_URL") - try: - if weaviate_url is None: - raise EnvironmentError( - "WEAVIATE_URL is not set in the environment variables" - ) - - weaviate_api_key = os.environ.get("WEAVIATE_API_KEY") - if weaviate_api_key is None: - raise EnvironmentError( - "WEAVIATE_API_KEY is not set in the environment variables" - ) - - auth_config = weaviate.AuthApiKey(api_key=weaviate_api_key) - - client = weaviate.Client(url=weaviate_url, auth_client_secret=auth_config) - except Exception as e: - print(f"An error occurred: {e}") - client = None - return client - +def opensearch_vector_store(region_name=os.getenv("AWS_REGION")): + session = boto3.Session(region_name=region_name) + awsauth = AWS4Auth(region=region_name, service="es", refreshable_credentials=session.get_credentials()) -def weaviate_vector_store(index_name: str, text_key: str, attributes: List[str] = []): - if os.getenv("SKIP_WEAVIATE_SETUP"): - return None - - client = weaviate_client() + sagemaker_client = session.client(service_name="sagemaker-runtime", region_name=session.region_name) + embeddings = SagemakerEndpointEmbeddings( + client=sagemaker_client, + region_name=session.region_name, + endpoint_name=os.getenv("EMBEDDING_ENDPOINT"), + content_handler=ContentHandler() + ) - return Weaviate( - client=client, index_name=index_name, text_key=text_key, attributes=attributes + docsearch = OpenSearchVectorSearch( + index_name=prefix("dc-v2-work"), + embedding_function=embeddings, + opensearch_url="https://" + os.getenv("ELASTICSEARCH_ENDPOINT"), + connection_class=RequestsHttpConnection, + http_auth=awsauth, ) + return docsearch def websocket_client(endpoint_url: str): diff --git a/chat/src/websocket.py b/chat/src/websocket.py index dc81179a..ea682b0a 100644 --- a/chat/src/websocket.py +++ b/chat/src/websocket.py @@ -12,5 +12,9 @@ def send(self, data): data = {"message": data} data["ref"] = self.ref data_as_bytes = bytes(json.dumps(data), "utf-8") - self.client.post_to_connection(Data=data_as_bytes, ConnectionId=self.connection_id) + + if self.connection_id == "debug": + print(data) + else: + self.client.post_to_connection(Data=data_as_bytes, ConnectionId=self.connection_id) return data diff --git a/chat/template.yaml b/chat/template.yaml index 24c89b7d..d7696246 100644 --- a/chat/template.yaml +++ b/chat/template.yaml @@ -17,12 +17,12 @@ Parameters: AzureOpenaiResourceName: Type: String Description: Azure OpenAI Resource Name - WeaviateApiKey: + ElasticsearchEndpoint: Type: String - Description: Weaviate API Key - WeaviateUrl: + Description: Elasticsearch URL + EmbeddingEndpoint: Type: String - Description: Weaviate URL + Description: Sagemaker Inference Endpoint Resources: ApiGwAccountConfig: Type: "AWS::ApiGateway::Account" @@ -202,8 +202,8 @@ Resources: AZURE_OPENAI_EMBEDDING_DEPLOYMENT_ID: !Ref AzureOpenaiEmbeddingDeploymentId AZURE_OPENAI_LLM_DEPLOYMENT_ID: !Ref AzureOpenaiLlmDeploymentId AZURE_OPENAI_RESOURCE_NAME: !Ref AzureOpenaiResourceName - WEAVIATE_API_KEY: !Ref WeaviateApiKey - WEAVIATE_URL: !Ref WeaviateUrl + ELASTICSEARCH_ENDPOINT: !Ref ElasticsearchEndpoint + EMBEDDING_ENDPOINT: !Ref EmbeddingEndpoint Policies: - Statement: - Effect: Allow @@ -211,6 +211,18 @@ Resources: - 'execute-api:ManageConnections' Resource: - !Sub 'arn:aws:execute-api:${AWS::Region}:${AWS::AccountId}:${ChatWebSocket}/*' + - Statement: + - Effect: Allow + Action: + - 'es:ESHttpGet' + - 'es:ESHttpPost' + Resource: '*' + - Statement: + - Effect: Allow + Action: + - 'sagemaker:InvokeEndpoint' + - 'sagemaker:InvokeEndpointAsync' + Resource: !Sub 'arn:aws:sagemaker:${AWS::Region}:${AWS::AccountId}:endpoint/${EmbeddingEndpoint}' Metadata: BuildMethod: nodejs18.x Deployment: diff --git a/chat/test/handlers/test_chat.py b/chat/test/handlers/test_chat.py index 21c9b643..ebce7e51 100644 --- a/chat/test/handlers/test_chat.py +++ b/chat/test/handlers/test_chat.py @@ -1,5 +1,7 @@ # ruff: noqa: E402 +import contextlib +from io import StringIO import json import os import sys @@ -70,5 +72,11 @@ def test_handler_debug_mode_for_superusers_only(self, mock_is_debug_enabled, moc @patch.object(EventConfig, 'setup_websocket') def test_error_handling(self, mock_event): mock_event.side_effect = Exception("Some error occurred") - with self.assertRaises(Exception): - handler({}, {}) \ No newline at end of file + capture = StringIO() + with contextlib.redirect_stdout(capture): + response = handler({}, {}) + self.assertEqual(response['statusCode'], 500) + self.assertIn('Unhandled error:', response['body']) + self.assertIn('Exception: Some error occurred', response['body']) + self.assertIn('Exception: Some error occurred', capture.getvalue()) + diff --git a/chat/test/helpers/test_prompts.py b/chat/test/helpers/test_prompts.py index 9508f32a..b9a7d950 100644 --- a/chat/test/helpers/test_prompts.py +++ b/chat/test/helpers/test_prompts.py @@ -17,17 +17,17 @@ class TestDocumentTemplate(TestCase): def test_empty_attributes(self): self.assertEqual( document_template(), - "Content: {page_content}\nMetadata:\nSource: {source}", + "Content: {title}\nMetadata:\nSource: {id}", ) def test_single_attribute(self): self.assertEqual( document_template(["title"]), - "Content: {page_content}\nMetadata:\n title: {title}\nSource: {source}", + "Content: {title}\nMetadata:\n title: {title}\nSource: {id}", ) def test_multiple_attributes(self): self.assertEqual( document_template(["title", "author", "subject", "description"]), - "Content: {page_content}\nMetadata:\n title: {title}\n author: {author}\n subject: {subject}\n description: {description}\nSource: {source}", + "Content: {title}\nMetadata:\n title: {title}\n author: {author}\n subject: {subject}\n description: {description}\nSource: {id}", ) diff --git a/chat/test/test_event_config.py b/chat/test/test_event_config.py index 8d8c02c1..55f8381d 100644 --- a/chat/test/test_event_config.py +++ b/chat/test/test_event_config.py @@ -50,10 +50,9 @@ def test_attempt_override_without_superuser_status(self): } ) expected_output = { - "attributes": [], + "attributes": EventConfig.DEFAULT_ATTRIBUTES, "azure_endpoint": "https://test.openai.azure.com/", - "index_name": "DCWork", - "k": 10, + "k": 5, "openai_api_version": "2023-07-01-preview", "question": "test question", "ref": "test ref", @@ -61,7 +60,6 @@ def test_attempt_override_without_superuser_status(self): "text_key": "title", } self.assertEqual(actual.azure_endpoint, expected_output["azure_endpoint"]) - self.assertEqual(actual.index_name, expected_output["index_name"]) self.assertEqual(actual.attributes, expected_output["attributes"]) self.assertEqual(actual.k, expected_output["k"]) self.assertEqual( @@ -72,23 +70,6 @@ def test_attempt_override_without_superuser_status(self): self.assertEqual(actual.temperature, expected_output["temperature"]) self.assertEqual(actual.text_key, expected_output["text_key"]) - def test_text_key_removed_from_attributes_list(self): - actual = EventConfig( - event={ - "body": json.dumps( - { - "attributes": ["title", "description"], - "text_key": "description", - } - ) - } - ) - self.assertNotIn(actual.text_key, actual.attributes) - - def test_source_removed_from_attributes_list(self): - actual = EventConfig(event={"body": json.dumps({"attributes": ["source"]})}) - self.assertNotIn("source", actual.attributes) - def test_debug_message(self): self.assertEqual( EventConfig( diff --git a/template.yaml b/template.yaml index 3256c546..5234a30a 100644 --- a/template.yaml +++ b/template.yaml @@ -62,6 +62,9 @@ Parameters: ElasticsearchEndpoint: Type: String Description: Elasticsearch url + EmbeddingEndpoint: + Type: String + Description: Sagemaker Inference Endpoint EnvironmentPrefix: Type: String Description: Index Prefix @@ -112,12 +115,6 @@ Parameters: StreamingBucket: Type: String Description: Meadow streaming bucket - WeaviateApiKey: - Type: String - Description: Weaviate API Key - WeaviateUrl: - Type: String - Description: Weaviate URL Resources: apiDependencies: Type: AWS::Serverless::LayerVersion @@ -662,8 +659,8 @@ Resources: AzureOpenaiEmbeddingDeploymentId: !Ref AzureOpenaiEmbeddingDeploymentId AzureOpenaiLlmDeploymentId: !Ref AzureOpenaiLlmDeploymentId AzureOpenaiResourceName: !Ref AzureOpenaiResourceName - WeaviateApiKey: !Ref WeaviateApiKey - WeaviateUrl: !Ref WeaviateUrl + ElasticsearchEndpoint: !Ref ElasticsearchEndpoint + EmbeddingEndpoint: !Ref EmbeddingEndpoint chatWebsocketEndpoint: Type: AWS::Serverless::Function Properties: