diff --git a/.github/.codecov.yml b/.github/.codecov.yml deleted file mode 100644 index 9d4b3afb6..000000000 --- a/.github/.codecov.yml +++ /dev/null @@ -1,23 +0,0 @@ -coverage: - status: - project: - default: - threshold: 100% - parsers: - gcov: - branch_detection: - conditional: yes - loop: yes - method: no - macro: no - - flags: - python: - paths: - - py/ - javascript: - paths: - - js/ - go: - paths: - - go/ diff --git a/.github/actions/run-sdk-collections-tests/action.yml b/.github/actions/run-sdk-collections-tests/action.yml index 9aa609a27..f73b89d65 100644 --- a/.github/actions/run-sdk-collections-tests/action.yml +++ b/.github/actions/run-sdk-collections-tests/action.yml @@ -97,3 +97,13 @@ runs: working-directory: ./py shell: bash run: poetry run python tests/integration/runner_sdk_basic.py test_delete_chunks + + - name: Get all prompts + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk_basic.py test_get_all_prompts + + - name: Get prompt + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk_basic.py test_get_prompt diff --git a/.github/workflows/r2r-full-py-integration-tests-graphrag.yml b/.github/workflows/r2r-full-py-integration-tests-graphrag.yml index e35df72db..f900bd3d7 100644 --- a/.github/workflows/r2r-full-py-integration-tests-graphrag.yml +++ b/.github/workflows/r2r-full-py-integration-tests-graphrag.yml @@ -1,6 +1,14 @@ name: R2R Full Python Integration Test (ubuntu) on: + push: + branches: + - main + pull_request: + branches: + - dev + - dev-minor + - main workflow_dispatch: jobs: @@ -47,3 +55,13 @@ jobs: - name: Run SDK GraphRAG Tests if: matrix.test_category == 'sdk-graphrag' uses: ./.github/actions/run-sdk-graphrag-tests + + - name: Print Docker logs on failure + if: > + (matrix.test_category == 'cli-graphrag' && failure()) || + (matrix.test_category == 'sdk-graphrag' && failure()) + run: | + echo "::group::Docker Logs" + docker ps -a + docker logs r2r-full-r2r-1 + echo "::endgroup::" diff --git a/.github/workflows/r2r-full-py-integration-tests.yml b/.github/workflows/r2r-full-py-integration-tests.yml index f31fde057..b465edafd 100644 --- a/.github/workflows/r2r-full-py-integration-tests.yml +++ b/.github/workflows/r2r-full-py-integration-tests.yml @@ -1,6 +1,14 @@ name: R2R Full Python Integration Test (ubuntu) on: + push: + branches: + - main + pull_request: + branches: + - dev + - dev-minor + - main workflow_dispatch: jobs: @@ -72,3 +80,18 @@ jobs: - name: Run SDK Prompt Tests if: matrix.test_category == 'sdk-prompts' uses: ./.github/actions/run-sdk-prompt-management-tests + + - name: Print Docker logs on failure + if: > + (matrix.test_category == 'cli-ingestion' && failure()) || + (matrix.test_category == 'sdk-ingestion' && failure()) || + (matrix.test_category == 'cli-retrieval' && failure()) || + (matrix.test_category == 'sdk-retrieval' && failure()) || + (matrix.test_category == 'sdk-auth' && failure()) || + (matrix.test_category == 'sdk-collections' && failure()) || + (matrix.test_category == 'sdk-prompts' && failure()) + run: | + echo "::group::Docker Logs" + docker ps -a + docker logs r2r-full-r2r-1 + echo "::endgroup::" diff --git a/.github/workflows/r2r-js-sdk-integration-tests.yml b/.github/workflows/r2r-js-sdk-integration-tests.yml index 2712ab5c1..eed9f948e 100644 --- a/.github/workflows/r2r-js-sdk-integration-tests.yml +++ b/.github/workflows/r2r-js-sdk-integration-tests.yml @@ -1,11 +1,59 @@ name: R2R JS SDK Integration Tests + on: push: branches: - - '**' # Trigger on all branches + - '**' + jobs: - test: + setup: runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python and install dependencies + uses: ./.github/actions/setup-python-light + with: + os: ubuntu-latest + - name: Setup and start PostgreSQL + uses: ./.github/actions/setup-postgres-ext + with: + os: ubuntu-latest + - name: Start R2R Light server + uses: ./.github/actions/start-r2r-light + - name: Use Node.js + uses: actions/setup-node@v2 + with: + node-version: "20.x" + - name: Install pnpm + uses: pnpm/action-setup@v2 + with: + version: 8.x + run_install: false + - name: Install JS SDK dependencies + working-directory: ./js/sdk + run: pnpm install + - name: Check if R2R server is running + run: | + curl http://localhost:7272/v2/health || echo "Server not responding" + + v3-integration-tests: + needs: setup + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + test-group: + - ChunksIntegrationSuperUser.test.ts + - CollectionsIntegrationSuperUser.test.ts + - ConversationsIntegrationSuperUser.test.ts + - DocumentsAndCollectionsIntegrationUser.test.ts + - DocumentsIntegrationSuperUser.test.ts + - GraphsIntegrationSuperUser.test.ts + - PromptsIntegrationSuperUser.test.ts + - RetrievalIntegrationSuperUser.test.ts + - SystemIntegrationSuperUser.test.ts + - SystemIntegrationUser.test.ts + - UsersIntegrationSuperUser.test.ts env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} AZURE_API_KEY: ${{ secrets.AZURE_API_KEY }} @@ -42,9 +90,6 @@ jobs: - name: Install JS SDK dependencies working-directory: ./js/sdk run: pnpm install - - name: Check if R2R server is running - run: | - curl http://localhost:7272/v2/health || echo "Server not responding" - - name: Run integration tests + - name: Run remaining tests working-directory: ./js/sdk - run: pnpm test + run: pnpm jest ${{ matrix.test-group }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 73010fcef..4a8dfa20d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,15 @@ repos: - id: check-ast - id: check-yaml + - repo: local + hooks: + - id: check-typing-imports + name: Check for Dict/List usage + entry: bash -c 'echo "Checking for typing imports..." && find . -name "*.py" | xargs grep -n "from typing.*import.*[^d]Dict\\|from typing.*import.*List" || exit 0 && echo "⚠️ Please import dict/list instead of Dict/List from typing" && exit 1' + language: system + types: [python] + pass_filenames: false + - repo: local hooks: - id: isort diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index 7c14a5c0c..000000000 --- a/docs/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# Mintlify Starter Kit - -Click on `Use this template` to copy the Mintlify starter kit. The starter kit contains examples including - -- Guide pages -- Navigation -- Customizations -- API Reference pages -- Use of popular components - -### Development - -Install the [Mintlify CLI](https://www.npmjs.com/package/mintlify) to preview the documentation changes locally. To install, use the following command - -``` -npm i -g mintlify -``` - -Run the following command at the root of your documentation (where mint.json is) - -``` -mintlify dev -``` - -### Publishing Changes - -Install our Github App to auto propagate changes from your repo to your deployment. Changes will be deployed to production automatically after pushing to the default branch. Find the link to install on your dashboard. - -#### Troubleshooting - -- Mintlify dev isn't running - Run `mintlify install` it'll re-install dependencies. -- Page loads as a 404 - Make sure you are running in a folder with `mint.json` diff --git a/docs/api-reference.mdx b/docs/api-reference.mdx deleted file mode 100644 index e3d89e123..000000000 --- a/docs/api-reference.mdx +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: 'API Reference' -description: 'Build, scale, and manage user-facing Retrieval-Augmented Generation applications.' -icon: 'message-code' -redirect: api-reference/introduction ---- diff --git a/docs/api-reference/endpoint/add_prompt.mdx b/docs/api-reference/endpoint/add_prompt.mdx deleted file mode 100644 index f0e9b6714..000000000 --- a/docs/api-reference/endpoint/add_prompt.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Add Prompt' -openapi: 'POST /v2/add_prompt' ---- diff --git a/docs/api-reference/endpoint/add_user_to_collection.mdx b/docs/api-reference/endpoint/add_user_to_collection.mdx deleted file mode 100644 index 0453db573..000000000 --- a/docs/api-reference/endpoint/add_user_to_collection.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Add User to Collection' -openapi: 'POST /v2/add_user_to_collection' ---- diff --git a/docs/api-reference/endpoint/agent.mdx b/docs/api-reference/endpoint/agent.mdx deleted file mode 100644 index 9171f27c6..000000000 --- a/docs/api-reference/endpoint/agent.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Agent' -openapi: 'POST /v2/agent' ---- diff --git a/docs/api-reference/endpoint/analytics.mdx b/docs/api-reference/endpoint/analytics.mdx deleted file mode 100644 index 5fa905125..000000000 --- a/docs/api-reference/endpoint/analytics.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Analytics' -openapi: 'GET /v2/analytics' ---- diff --git a/docs/api-reference/endpoint/app_settings.mdx b/docs/api-reference/endpoint/app_settings.mdx deleted file mode 100644 index 20ef742f0..000000000 --- a/docs/api-reference/endpoint/app_settings.mdx +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: 'App Settings' -openapi: 'GET /v2/app_settings' ---- - -Gets the latest settings for the application logic. diff --git a/docs/api-reference/endpoint/assign_document_to_collection.mdx b/docs/api-reference/endpoint/assign_document_to_collection.mdx deleted file mode 100644 index 2d49e6647..000000000 --- a/docs/api-reference/endpoint/assign_document_to_collection.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Assign Document to Collection' -openapi: 'POST /v2/assign_document_to_collection' ---- diff --git a/docs/api-reference/endpoint/change_password.mdx b/docs/api-reference/endpoint/change_password.mdx deleted file mode 100644 index 3d596f9a9..000000000 --- a/docs/api-reference/endpoint/change_password.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Change Password' -openapi: 'POST /v2/change_password' ---- diff --git a/docs/api-reference/endpoint/collections_overview.mdx b/docs/api-reference/endpoint/collections_overview.mdx deleted file mode 100644 index 3f2da43e4..000000000 --- a/docs/api-reference/endpoint/collections_overview.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Collections Overview' -openapi: 'GET /v2/collections_overview' ---- diff --git a/docs/api-reference/endpoint/communities.mdx b/docs/api-reference/endpoint/communities.mdx deleted file mode 100644 index 5d3165f3e..000000000 --- a/docs/api-reference/endpoint/communities.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Communities' -openapi: 'GET /v2/communities' ---- diff --git a/docs/api-reference/endpoint/completion.mdx b/docs/api-reference/endpoint/completion.mdx deleted file mode 100644 index 2d1317831..000000000 --- a/docs/api-reference/endpoint/completion.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Completion' -openapi: 'GET /v2/completion' ---- diff --git a/docs/api-reference/endpoint/create_collection.mdx b/docs/api-reference/endpoint/create_collection.mdx deleted file mode 100644 index 731d46bad..000000000 --- a/docs/api-reference/endpoint/create_collection.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Create Collection' -openapi: 'POST /v2/create_collection' ---- diff --git a/docs/api-reference/endpoint/create_graph.mdx b/docs/api-reference/endpoint/create_graph.mdx deleted file mode 100644 index 0c6b910a1..000000000 --- a/docs/api-reference/endpoint/create_graph.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Create Graph' -openapi: 'POST /v2/create_graph' ---- diff --git a/docs/api-reference/endpoint/create_vector_index.mdx b/docs/api-reference/endpoint/create_vector_index.mdx deleted file mode 100644 index d67c705e9..000000000 --- a/docs/api-reference/endpoint/create_vector_index.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Create Vector Index -openapi: 'POST /v2/create_vector_index' ---- diff --git a/docs/api-reference/endpoint/deduplicate_entities.mdx b/docs/api-reference/endpoint/deduplicate_entities.mdx deleted file mode 100644 index 556705132..000000000 --- a/docs/api-reference/endpoint/deduplicate_entities.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Deduplicate Entities -openapi: 'POST /v2/deduplicate_entities' ---- diff --git a/docs/api-reference/endpoint/delete.mdx b/docs/api-reference/endpoint/delete.mdx deleted file mode 100644 index 949c20e31..000000000 --- a/docs/api-reference/endpoint/delete.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Delete' -openapi: 'DELETE /v2/delete' ---- diff --git a/docs/api-reference/endpoint/delete_collection.mdx b/docs/api-reference/endpoint/delete_collection.mdx deleted file mode 100644 index 6af466930..000000000 --- a/docs/api-reference/endpoint/delete_collection.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Delete Collection' -openapi: 'DELETE /v2/delete_collection/{collection_id}' ---- diff --git a/docs/api-reference/endpoint/delete_entities_and_triples.mdx b/docs/api-reference/endpoint/delete_entities_and_triples.mdx deleted file mode 100644 index 96fb9c37e..000000000 --- a/docs/api-reference/endpoint/delete_entities_and_triples.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Delete graph for collection' -openapi: 'DELETE /v2/delete_graph_for_collection' ---- diff --git a/docs/api-reference/endpoint/delete_prompt.mdx b/docs/api-reference/endpoint/delete_prompt.mdx deleted file mode 100644 index fb857d5ab..000000000 --- a/docs/api-reference/endpoint/delete_prompt.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Delete Prompt' -openapi: 'DELETE /v2/delete_prompt/{prompt_name}' ---- diff --git a/docs/api-reference/endpoint/delete_user_info.mdx b/docs/api-reference/endpoint/delete_user_info.mdx deleted file mode 100644 index eba06a0c0..000000000 --- a/docs/api-reference/endpoint/delete_user_info.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Delete User' -openapi: 'DELETE /v2/user/{user_id}' ---- diff --git a/docs/api-reference/endpoint/delete_vector_index.mdx b/docs/api-reference/endpoint/delete_vector_index.mdx deleted file mode 100644 index 47c1ee5e6..000000000 --- a/docs/api-reference/endpoint/delete_vector_index.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: Delete Vector Indices -openapi: 'DELETE /v2/delete_vector_indices' ---- diff --git a/docs/api-reference/endpoint/document_chunks.mdx b/docs/api-reference/endpoint/document_chunks.mdx deleted file mode 100644 index f3b8a66e3..000000000 --- a/docs/api-reference/endpoint/document_chunks.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Document Chunks' -openapi: 'GET /v2/document_chunks/{document_id}' ---- diff --git a/docs/api-reference/endpoint/documents_overview.mdx b/docs/api-reference/endpoint/documents_overview.mdx deleted file mode 100644 index 96bcf189b..000000000 --- a/docs/api-reference/endpoint/documents_overview.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Documents Overview' -openapi: 'GET /v2/documents_overview' ---- diff --git a/docs/api-reference/endpoint/enrich_graph.mdx b/docs/api-reference/endpoint/enrich_graph.mdx deleted file mode 100644 index b52f569b8..000000000 --- a/docs/api-reference/endpoint/enrich_graph.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Enrich Graph' -openapi: 'POST /v2/enrich_graph' ---- diff --git a/docs/api-reference/endpoint/entities.mdx b/docs/api-reference/endpoint/entities.mdx deleted file mode 100644 index fcf980626..000000000 --- a/docs/api-reference/endpoint/entities.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Entities' -openapi: 'GET /v2/entities' ---- diff --git a/docs/api-reference/endpoint/get_all_prompts.mdx b/docs/api-reference/endpoint/get_all_prompts.mdx deleted file mode 100644 index 30213cb3c..000000000 --- a/docs/api-reference/endpoint/get_all_prompts.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Get All Prompts' -openapi: 'GET /v2/get_all_prompts' ---- diff --git a/docs/api-reference/endpoint/get_collection.mdx b/docs/api-reference/endpoint/get_collection.mdx deleted file mode 100644 index a113fbba7..000000000 --- a/docs/api-reference/endpoint/get_collection.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Get Collection' -openapi: 'GET /v2/get_collection/{collection_id}' ---- diff --git a/docs/api-reference/endpoint/get_collections_for_user.mdx b/docs/api-reference/endpoint/get_collections_for_user.mdx deleted file mode 100644 index 0e8599694..000000000 --- a/docs/api-reference/endpoint/get_collections_for_user.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Get Collections for User' -openapi: 'GET /v2/user_collections/{user_id}' ---- diff --git a/docs/api-reference/endpoint/get_document_collections.mdx b/docs/api-reference/endpoint/get_document_collections.mdx deleted file mode 100644 index 97854297e..000000000 --- a/docs/api-reference/endpoint/get_document_collections.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Get Document Collections' -openapi: 'GET /v2/get_document_collections/{document_id}' ---- diff --git a/docs/api-reference/endpoint/get_prompt.mdx b/docs/api-reference/endpoint/get_prompt.mdx deleted file mode 100644 index 7c83aa913..000000000 --- a/docs/api-reference/endpoint/get_prompt.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Get Prompt' -openapi: 'GET /v2/get_prompt/{prompt_name}' ---- diff --git a/docs/api-reference/endpoint/get_user_info.mdx b/docs/api-reference/endpoint/get_user_info.mdx deleted file mode 100644 index 16ca7d442..000000000 --- a/docs/api-reference/endpoint/get_user_info.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Get User' -openapi: 'GET /v2/user' ---- diff --git a/docs/api-reference/endpoint/get_users_in_collection.mdx b/docs/api-reference/endpoint/get_users_in_collection.mdx deleted file mode 100644 index 3685ce6ab..000000000 --- a/docs/api-reference/endpoint/get_users_in_collection.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Get Users in Collection' -openapi: 'GET /v2/get_users_in_collection/{collection_id}' ---- diff --git a/docs/api-reference/endpoint/health.mdx b/docs/api-reference/endpoint/health.mdx deleted file mode 100644 index 0ff5dc39c..000000000 --- a/docs/api-reference/endpoint/health.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Health' -openapi: 'GET /v2/health' ---- diff --git a/docs/api-reference/endpoint/ingest_chunks.mdx b/docs/api-reference/endpoint/ingest_chunks.mdx deleted file mode 100644 index 0264de4f7..000000000 --- a/docs/api-reference/endpoint/ingest_chunks.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Ingest Chunks' -openapi: 'POST /v2/ingest_chunks' ---- diff --git a/docs/api-reference/endpoint/ingest_files.mdx b/docs/api-reference/endpoint/ingest_files.mdx deleted file mode 100644 index f5ffbd393..000000000 --- a/docs/api-reference/endpoint/ingest_files.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Ingest Files' -openapi: 'POST /v2/ingest_files' ---- diff --git a/docs/api-reference/endpoint/list_collections.mdx b/docs/api-reference/endpoint/list_collections.mdx deleted file mode 100644 index 28f955bcc..000000000 --- a/docs/api-reference/endpoint/list_collections.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'List Collections' -openapi: 'GET /v2/list_collections' ---- diff --git a/docs/api-reference/endpoint/list_vector_indices.mdx b/docs/api-reference/endpoint/list_vector_indices.mdx deleted file mode 100644 index cc803de0b..000000000 --- a/docs/api-reference/endpoint/list_vector_indices.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: List Vector Indices -openapi: 'GET /v2/list_vector_indices' ---- diff --git a/docs/api-reference/endpoint/login.mdx b/docs/api-reference/endpoint/login.mdx deleted file mode 100644 index 0c3c05a2c..000000000 --- a/docs/api-reference/endpoint/login.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Login' -openapi: 'POST /v2/login' ---- diff --git a/docs/api-reference/endpoint/logout.mdx b/docs/api-reference/endpoint/logout.mdx deleted file mode 100644 index 53e27e3e5..000000000 --- a/docs/api-reference/endpoint/logout.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Logout' -openapi: 'POST /v2/logout' ---- diff --git a/docs/api-reference/endpoint/logs.mdx b/docs/api-reference/endpoint/logs.mdx deleted file mode 100644 index 7133a62e4..000000000 --- a/docs/api-reference/endpoint/logs.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Logs' -openapi: 'GET /v2/logs' ---- diff --git a/docs/api-reference/endpoint/put_user_info.mdx b/docs/api-reference/endpoint/put_user_info.mdx deleted file mode 100644 index 720d96bb0..000000000 --- a/docs/api-reference/endpoint/put_user_info.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Update User' -openapi: 'PUT /v2/user' ---- diff --git a/docs/api-reference/endpoint/rag.mdx b/docs/api-reference/endpoint/rag.mdx deleted file mode 100644 index 961859653..000000000 --- a/docs/api-reference/endpoint/rag.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'RAG' -openapi: 'POST /v2/rag' ---- diff --git a/docs/api-reference/endpoint/refresh_access_token.mdx b/docs/api-reference/endpoint/refresh_access_token.mdx deleted file mode 100644 index d9c797d22..000000000 --- a/docs/api-reference/endpoint/refresh_access_token.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Refresh Access Token' -openapi: 'POST /v2/refresh_access_token' ---- diff --git a/docs/api-reference/endpoint/register.mdx b/docs/api-reference/endpoint/register.mdx deleted file mode 100644 index 099625b81..000000000 --- a/docs/api-reference/endpoint/register.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Register' -openapi: 'POST /v2/register' ---- diff --git a/docs/api-reference/endpoint/remove_document_from_collection.mdx b/docs/api-reference/endpoint/remove_document_from_collection.mdx deleted file mode 100644 index a4d99bfe5..000000000 --- a/docs/api-reference/endpoint/remove_document_from_collection.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Remove Document from Collection' -openapi: 'POST /v2/remove_document_from_collection' ---- diff --git a/docs/api-reference/endpoint/remove_user_from_collection.mdx b/docs/api-reference/endpoint/remove_user_from_collection.mdx deleted file mode 100644 index 77b5f9948..000000000 --- a/docs/api-reference/endpoint/remove_user_from_collection.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Remove User from Collection' -openapi: 'POST /v2/remove_user_from_collection' ---- diff --git a/docs/api-reference/endpoint/request_password_reset.mdx b/docs/api-reference/endpoint/request_password_reset.mdx deleted file mode 100644 index 6ca91376d..000000000 --- a/docs/api-reference/endpoint/request_password_reset.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Password Reset' -openapi: 'POST /v2/request_password_reset' ---- diff --git a/docs/api-reference/endpoint/score_completion.mdx b/docs/api-reference/endpoint/score_completion.mdx deleted file mode 100644 index 7e2f735a2..000000000 --- a/docs/api-reference/endpoint/score_completion.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Score Completion' -openapi: 'POST /v2/score_completion' ---- diff --git a/docs/api-reference/endpoint/search.mdx b/docs/api-reference/endpoint/search.mdx deleted file mode 100644 index e4136091c..000000000 --- a/docs/api-reference/endpoint/search.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Search' -openapi: 'POST /v2/search' ---- diff --git a/docs/api-reference/endpoint/triples.mdx b/docs/api-reference/endpoint/triples.mdx deleted file mode 100644 index 98681bc76..000000000 --- a/docs/api-reference/endpoint/triples.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Triples' -openapi: 'GET /v2/triples' ---- diff --git a/docs/api-reference/endpoint/update_collection.mdx b/docs/api-reference/endpoint/update_collection.mdx deleted file mode 100644 index a8f7d03a5..000000000 --- a/docs/api-reference/endpoint/update_collection.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Update Collection' -openapi: 'PUT /v2/update_collection' ---- diff --git a/docs/api-reference/endpoint/update_files.mdx b/docs/api-reference/endpoint/update_files.mdx deleted file mode 100644 index a062167c3..000000000 --- a/docs/api-reference/endpoint/update_files.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Update Files' -openapi: 'POST /v2/update_files' ---- diff --git a/docs/api-reference/endpoint/update_prompt.mdx b/docs/api-reference/endpoint/update_prompt.mdx deleted file mode 100644 index 976672219..000000000 --- a/docs/api-reference/endpoint/update_prompt.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Update Prompt' -openapi: 'POST /v2/update_prompt' ---- diff --git a/docs/api-reference/endpoint/users_overview.mdx b/docs/api-reference/endpoint/users_overview.mdx deleted file mode 100644 index 407b62c3d..000000000 --- a/docs/api-reference/endpoint/users_overview.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Users Overview' -openapi: 'GET /v2/users_overview' ---- diff --git a/docs/api-reference/endpoint/verify_email.mdx b/docs/api-reference/endpoint/verify_email.mdx deleted file mode 100644 index e4e9e8004..000000000 --- a/docs/api-reference/endpoint/verify_email.mdx +++ /dev/null @@ -1,4 +0,0 @@ ---- -title: 'Verify Email' -openapi: 'POST /v2/verify_email' ---- diff --git a/docs/api-reference/introduction.mdx b/docs/api-reference/introduction.mdx deleted file mode 100644 index c97692c13..000000000 --- a/docs/api-reference/introduction.mdx +++ /dev/null @@ -1,39 +0,0 @@ ---- -title: 'R2R API Reference' -description: 'Powerful document ingestion, search, and RAG capabilities at your fingertips' ---- - -## Welcome to the R2R API - -R2R (RAG to Riches) is a powerful library that offers both methods and a REST API for document ingestion, Retrieval-Augmented Generation (RAG), evaluation, and additional features like observability, analytics, and document management. This API documentation will guide you through the various endpoints and functionalities R2R provides. - - - This API documentation is designed to help developers integrate R2R's capabilities into their applications efficiently. Whether you're building a search engine, a question-answering system, or a document management solution, the R2R API has you covered. - - -## Key Features - -R2R API offers a wide range of features, including: - - - Document Ingestion and Management - - AI-Powered Search (Vector, Hybrid, and Knowledge Graph) - - Retrieval-Augmented Generation (RAG) - - User Auth & Management - - Observability and Analytics - - - View the R2R source code and contribute - - -## Getting Started - -To get started with the R2R API, you'll need to: - -1. Install R2R in your environment -2. Run the server with `r2r serve`, or customize your FastAPI for production settings. - -For detailed installation and setup instructions, please refer to our [Installation Guide](/documentation/installation). diff --git a/docs/api-reference/openapi.json b/docs/api-reference/openapi.json deleted file mode 100644 index fe7791b66..000000000 --- a/docs/api-reference/openapi.json +++ /dev/null @@ -1 +0,0 @@ -{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/v2/ingest_files":{"post":{"summary":"Ingest Files App","description":"Ingest files into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development.","operationId":"ingest_files_app_v2_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_v2_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_files(\n file_paths=[\"pg_essay_1.html\", \"got.txt\"],\n metadatas=[{\"metadata_1\":\"some random metadata\"}, {\"metadata_2\": \"some other random metadata\"}],\n document_ids=None\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1.html;type=text/html\" \\\n -F \"file=@got.txt;type=text/plain\" \\\n -F 'metadatas=[{},{}]' \\\n -F 'document_ids=null'\n"}]}},"/v2/update_files":{"post":{"summary":"Update Files App","description":"Update existing files in the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development.","operationId":"update_files_app_v2_update_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_v2_update_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.update_files(\n file_paths=[\"pg_essay_1_v2.txt\"],\n document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/update_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/ingest_chunks":{"post":{"summary":"Ingest Chunks App","description":"Ingest text chunks into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest pre-parsed text chunks into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. More expansive collection permissioning is under development.","operationId":"ingest_chunks_app_v2_ingest_chunks_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_ingest_chunks_app_v2_ingest_chunks_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_chunks(\n chunks=[\n {\n \"text\": \"Another chunk of text\",\n },\n {\n \"text\": \"Yet another chunk of text\",\n },\n {\n \"text\": \"A chunk of text\",\n },\n ],\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_chunks\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"chunks\": [\n {\n \"text\": \"Another chunk of text\"\n },\n {\n \"text\": \"Yet another chunk of text\"\n },\n {\n \"text\": \"A chunk of text\"\n }\n ],\n \"document_id\": \"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\",\n \"metadata\": {}\n }'\n"}]}},"/v2/update_chunk/{document_id}/{extraction_id}":{"put":{"summary":"Update Chunk App","operationId":"update_chunk_app_v2_update_chunk__document_id___extraction_id__put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","description":"The document ID of the chunk to update","title":"Document Id"},"description":"The document ID of the chunk to update"},{"name":"extraction_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","description":"The extraction ID of the chunk to update","title":"Extraction Id"},"description":"The extraction ID of the chunk to update"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_chunk_app_v2_update_chunk__document_id___extraction_id__put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_vector_index":{"post":{"summary":"Create Vector Index App","description":"Create a vector index for a given table.","operationId":"create_vector_index_app_v2_create_vector_index_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_vector_index_app_v2_create_vector_index_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CreateVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.create_vector_index(\n table_name=\"vectors\",\n index_method=\"hnsw\",\n index_measure=\"cosine_distance\",\n index_arguments={\"m\": 16, \"ef_construction\": 64},\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"http://localhost:7276/v2/create_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"table_name\": \"vectors\",\n \"index_method\": \"hnsw\",\n \"index_measure\": \"cosine_distance\",\n \"index_arguments\": {\n \"m\": 16,\n \"ef_construction\": 64\n },\n \"concurrently\": true\n }'\n"}]}},"/v2/list_vector_indices":{"get":{"summary":"List Vector Indices App","operationId":"list_vector_indices_app_v2_list_vector_indices_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"table_name","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The table to create the index on. Default: vectors","default":"vectors","title":"Table Name"},"description":"The table to create the index on. Default: vectors"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ListVectorIndicesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.create_vector_index(\n table_name=\"vectors\",\n index_method=\"hnsw\",\n index_measure=\"cosine_distance\",\n index_arguments={\"m\": 16, \"ef_construction\": 64},\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"http://localhost:7276/v2/create_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"table_name\": \"vectors\",\n \"index_method\": \"hnsw\",\n \"index_measure\": \"cosine_distance\",\n \"index_arguments\": {\n \"m\": 16,\n \"ef_construction\": 64\n },\n \"concurrently\": true\n }'\n"}]}},"/v2/delete_vector_index":{"delete":{"summary":"Delete Vector Index App","operationId":"delete_vector_index_app_v2_delete_vector_index_delete","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_vector_index_app_v2_delete_vector_index_delete"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_DeleteVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.delete_vector_index(\n index_name=\"ix_vector_cosine_ops_hnsw__20241021211541\",\n table_name=\"vectors\",\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X DELETE \"http://localhost:7276/v2/delete_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"index_name\": \"ix_vector_cosine_ops_hnsw__20241021211541\",\n \"table_name\": \"vectors\",\n \"concurrently\": true\n }'\n"}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","minimum":-1,"description":"Number of items to return. Use -1 to return all items.","default":100,"title":"Limit"},"description":"Number of items to return. Use -1 to return all items."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}},{"name":"include_vectors","in":"query","required":false,"schema":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Include Vectors"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/conversations_overview":{"get":{"summary":"Conversations Overview App","operationId":"conversations_overview_app_v2_conversations_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Conversation Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_ConversationOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_conversation/{conversation_id}":{"get":{"summary":"Get Conversation","operationId":"get_conversation_v2_get_conversation__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"},{"name":"branch_id","in":"query","required":false,"schema":{"type":"string","description":"Branch ID","title":"Branch Id"},"description":"Branch ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_Tuple_str__Message___"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_conversation":{"post":{"summary":"Create Conversation","operationId":"create_conversation_v2_create_conversation_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Create Conversation V2 Create Conversation Post"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_message/{conversation_id}":{"post":{"summary":"Add Message","operationId":"add_message_v2_add_message__conversation_id__post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_message_v2_add_message__conversation_id__post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Add Message V2 Add Message Conversation Id Post"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_message/{message_id}":{"put":{"summary":"Edit Message","operationId":"edit_message_v2_update_message__message_id__put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"message_id","in":"path","required":true,"schema":{"type":"string","description":"Message ID","title":"Message Id"},"description":"Message ID"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"string","description":"New content","title":"Message"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Edit Message V2 Update Message Message Id Put"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/branches_overview/{conversation_id}":{"get":{"summary":"Branches Overview","operationId":"branches_overview_v2_branches_overview__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Branches Overview V2 Branches Overview Conversation Id Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete_conversation/{conversation_id}":{"delete":{"summary":"Delete Conversation","operationId":"delete_conversation_v2_delete_conversation__conversation_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/completion":{"post":{"summary":"Completion","description":"Generate completions for a list of messages.\n\nThis endpoint uses the language model to generate completions for the provided messages.\nThe generation process can be customized using the generation_config parameter.","operationId":"completion_v2_completion_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_completion_v2_completion_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.\nIf document IDs are not provided, the graph will be created on all documents in the system.\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information.\nIt creates communities of nodes based on their similarity and adds embeddings to the graph.\nThis step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/entities":{"get":{"summary":"Get Entities","description":"Retrieve entities from the knowledge graph.","operationId":"get_entities_v2_entities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"entity_level","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/EntityLevel"},{"type":"null"}],"description":"Type of entities to retrieve. Options are: raw, dedup_document, dedup_collection.","default":"document","title":"Entity Level"},"description":"Type of entities to retrieve. Options are: raw, dedup_document, dedup_collection."},{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve entities from.","title":"Collection Id"},"description":"Collection ID to retrieve entities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity IDs to filter by.","title":"Entity Ids"},"description":"Entity IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEntitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/triples":{"get":{"summary":"Get Triples","description":"Retrieve triples from the knowledge graph.","operationId":"get_triples_v2_triples_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve triples from.","title":"Collection Id"},"description":"Collection ID to retrieve triples from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_names","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity names to filter by.","title":"Entity Names"},"description":"Entity names to filter by."},{"name":"triple_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Triple IDs to filter by.","title":"Triple Ids"},"description":"Triple IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTriplesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/communities":{"get":{"summary":"Get Communities","description":"Retrieve communities from the knowledge graph.","operationId":"get_communities_v2_communities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"levels","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Levels to filter by.","title":"Levels"},"description":"Levels to filter by."},{"name":"community_numbers","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Community numbers to filter by.","title":"Community Numbers"},"description":"Community numbers to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCommunitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/deduplicate_entities":{"post":{"summary":"Deduplicate Entities","description":"Deduplicate entities in the knowledge graph.","operationId":"deduplicate_entities_v2_deduplicate_entities_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_deduplicate_entities_v2_deduplicate_entities_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGEntityDeduplicationResponse__KGDeduplicationEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/tuned_prompt":{"get":{"summary":"Get Tuned Prompt","description":"Auto-tune the prompt for a specific collection.","operationId":"get_tuned_prompt_v2_tuned_prompt_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"query","required":true,"schema":{"type":"string","description":"The name of the prompt to tune. Valid options are 'kg_triples_extraction_prompt', 'kg_entity_description_prompt' and 'community_reports_prompt'.","title":"Prompt Name"},"description":"The name of the prompt to tune. Valid options are 'kg_triples_extraction_prompt', 'kg_entity_description_prompt' and 'community_reports_prompt'."},{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"documents_offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Offset for document pagination.","default":0,"title":"Documents Offset"},"description":"Offset for document pagination."},{"name":"documents_limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Limit for document pagination.","default":100,"title":"Documents Limit"},"description":"Limit for document pagination."},{"name":"chunks_offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Offset for chunk pagination.","default":0,"title":"Chunks Offset"},"description":"Offset for chunk pagination."},{"name":"chunks_limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Limit for chunk pagination.","default":100,"title":"Chunks Limit"},"description":"Limit for chunk pagination."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTunePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete_graph_for_collection":{"delete":{"summary":"Delete Graph For Collection","description":"Delete the graph for a given collection. Note that this endpoint may delete a large amount of data created by the KG pipeline, this deletion is irreversible, and recreating the graph may be an expensive operation.\n\nNotes:\nThe endpoint deletes all communities for a given collection. If the cascade flag is set to true, the endpoint also deletes all the entities and triples associated with the collection.\n\nWARNING: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection.","operationId":"delete_graph_for_collection_v2_delete_graph_for_collection_delete","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"BaseModel":{"properties":{},"type":"object","title":"BaseModel"},"Body_add_message_v2_add_message__conversation_id__post":{"properties":{"message":{"$ref":"#/components/schemas/Message","description":"Message content"},"parent_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Parent Id","description":"Parent message ID"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata","description":"Metadata"}},"type":"object","required":["message"],"title":"Body_add_message_v2_add_message__conversation_id__post"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"message":{"anyOf":[{"$ref":"#/components/schemas/Message"},{"type":"null"}],"description":"Message object"},"messages":{"anyOf":[{"items":{"$ref":"#/components/schemas/Message"},"type":"array"},{"type":"null"}],"title":"Messages","description":"List of message objects (deprecated, use message instead)","deprecated":true},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true},"conversation_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Conversation Id","description":"The ID of the conversation, a new ID is assigned if not provided"},"branch_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Branch Id","description":"The ID of the branch"}},"type":"object","title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_completion_v2_completion_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"The messages to complete"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"The generation config"}},"type":"object","required":["messages"],"title":"Body_completion_v2_completion_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to create graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph creation process."},"kg_creation_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_create_vector_index_app_v2_create_vector_index_post":{"properties":{"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The table to create the index on. Default: vectors","default":"vectors"},"index_method":{"$ref":"#/components/schemas/IndexMethod","description":"The indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw","default":"hnsw"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance","default":"cosine_distance"},"index_arguments":{"anyOf":[{"$ref":"#/components/schemas/IndexArgsIVFFlat"},{"$ref":"#/components/schemas/IndexArgsHNSW"},{"type":"null"}],"title":"Index Arguments","description":"Configuration parameters for the chosen index method. For HNSW: {m: int, ef_construction: int}. For IVFFlat: {n_lists: int}"},"index_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Index Name","description":"Optional custom name for the index. If not provided, one will be auto-generated"},"index_column":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Index Column","description":"The column containing the vectors to index. Default: `vec`, or `vec_binary` when using hamming or jaccard distance."},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to create the index concurrently. Default: true","default":true}},"type":"object","title":"Body_create_vector_index_app_v2_create_vector_index_post"},"Body_deduplicate_entities_v2_deduplicate_entities_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to deduplicate entities for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the deduplication process."},"deduplication_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Deduplication Settings","description":"Settings for the deduplication process."}},"type":"object","title":"Body_deduplicate_entities_v2_deduplicate_entities_post"},"Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id","description":"Collection ID to delete graph for."},"cascade":{"type":"boolean","title":"Cascade","description":"Whether to cascade the deletion, and delete entities and triples belonging to the collection.","default":false}},"type":"object","required":["collection_id"],"title":"Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_delete_vector_index_app_v2_delete_vector_index_delete":{"properties":{"index_name":{"type":"string","title":"Index Name","description":"The name of the index to delete"},"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The name of the table containing the index. Default: vectors","default":"vectors"},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to delete the index concurrently. Default: true","default":true}},"type":"object","required":["index_name"],"title":"Body_delete_vector_index_app_v2_delete_vector_index_delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to enrich graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_chunks_app_v2_ingest_chunks_post":{"properties":{"chunks":{"items":{"$ref":"#/components/schemas/RawChunk"},"type":"array","title":"Chunks","description":"A list of text chunks to ingest into the system.","default":{}},"document_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document Id","description":"An optional document id to associate the chunks with. If not provided, a unique document id will be generated."},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","description":"Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result.","default":true}},"type":"object","title":"Body_ingest_chunks_app_v2_ingest_chunks_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","description":"Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result.","default":true}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id","description":"ID of the user to update"},"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"is_superuser":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Superuser","description":"Updated superuser status"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_chunk_app_v2_update_chunk__document_id___extraction_id__put":{"properties":{"text":{"type":"string","title":"Text","description":"The new text content for the chunk"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional updated metadata"},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","default":true}},"type":"object","required":["text"],"title":"Body_update_chunk_app_v2_update_chunk__document_id___extraction_id__put"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","description":"Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result.","default":true}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"CollectionOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"CollectionOverviewResponse"},"CollectionResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"CollectionResponse"},"CommunityReport":{"properties":{"community_number":{"type":"integer","title":"Community Number"},"level":{"type":"integer","title":"Level"},"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name","default":""},"summary":{"type":"string","title":"Summary","default":""},"findings":{"items":{"type":"string"},"type":"array","title":"Findings","default":[]},"rating":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Rating"},"rating_explanation":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rating Explanation"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Embedding"},"attributes":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["community_number","level","collection_id"],"title":"CommunityReport"},"ConversationOverviewResponse":{"properties":{"conversation_id":{"type":"string","format":"uuid","title":"Conversation Id"},"created_at":{"type":"string","format":"date-time","title":"Created At"}},"type":"object","required":["conversation_id","created_at"],"title":"ConversationOverviewResponse"},"CreateVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message","description":"Vector index creation queued successfully."}},"type":"object","required":["message"],"title":"CreateVectorIndexResponse"},"DeleteVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"DeleteVectorIndexResponse"},"DocumentChunkResponse":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"},"vector":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Vector"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"document_type":{"type":"string","title":"Document Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"kg_extraction_status":{"type":"string","title":"Kg Extraction Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","document_type","created_at","updated_at","ingestion_status","kg_extraction_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"Entity":{"properties":{"name":{"type":"string","title":"Name"},"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"category":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Category"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"description_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string"},{"type":"null"}],"title":"Description Embedding"},"community_numbers":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Community Numbers"},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id"},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"document_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Document Ids"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["name"],"title":"Entity","description":"An entity extracted from a document."},"EntityLevel":{"type":"string","enum":["collection","document","chunk"],"title":"EntityLevel"},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"},"response_format":{"anyOf":[{"type":"object"},{"$ref":"#/components/schemas/BaseModel"},{"type":"null"}],"title":"Response Format"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexArgsHNSW":{"properties":{"m":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"M","default":16},"ef_construction":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Construction","default":64}},"type":"object","title":"IndexArgsHNSW","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an HNSW type index.\n\nRef: https://github.com/pgvector/pgvector#index-options\n\nBoth attributes are Optional in case the user only wants to specify one and\nleave the other as default\n\nAttributes:\n m (int): Maximum number of connections per node per layer (default: 16)\n ef_construction (int): Size of the dynamic candidate list for\n constructing the graph (default: 64)"},"IndexArgsIVFFlat":{"properties":{"n_lists":{"type":"integer","title":"N Lists"}},"type":"object","required":["n_lists"],"title":"IndexArgsIVFFlat","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an IVFFlat type index.\n\nAttributes:\n nlist (int): The number of IVF centroids that the index should use"},"IndexMeasure":{"type":"string","enum":["l2_distance","max_inner_product","cosine_distance","l1_distance","hamming_distance","jaccard_distance"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IndexMethod":{"type":"string","enum":["auto","ivfflat","hnsw"],"title":"IndexMethod","description":"An enum representing the index methods available.\n\nThis class currently only supports the 'ivfflat' method but may\nexpand in the future.\n\nAttributes:\n auto (str): Automatically choose the best available index method.\n ivfflat (str): The ivfflat index method.\n hnsw (str): The hnsw index method."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCommunitiesResponse":{"properties":{"communities":{"items":{"$ref":"#/components/schemas/CommunityReport"},"type":"array","title":"Communities","description":"The list of communities in the graph for the collection."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of communities in the graph."}},"type":"object","required":["communities","total_entries"],"title":"KGCommunitiesResponse","description":"Response for knowledge graph communities.","example":{"communities":[{"collection_ids":["122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"],"community_number":1,"findings":["finding1","finding2"],"id":"1","level":0,"name":"community name","rating":"8","rating_explanation":"rating explanation","summary":"community summary"}],"total_count":1}},"KGCommunityResult":{"properties":{"name":{"type":"string","title":"Name"},"summary":{"type":"string","title":"Summary"},"rating":{"type":"number","title":"Rating"},"rating_explanation":{"type":"string","title":"Rating Explanation"},"findings":{"items":{"type":"string"},"type":"array","title":"Findings"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","summary","rating","rating_explanation","findings"],"title":"KGCommunityResult","findings":["Finding 1","Finding 2"],"metadata":{},"name":"Community Name","rating":9,"rating_explanation":"Rating Explanation","summary":"Community Summary"},"KGDeduplicationEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"num_entities":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Entities","description":"The number of entities in the collection."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost in USD."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated time in minutes."}},"type":"object","title":"KGDeduplicationEstimationResponse","description":"Response for knowledge graph deduplication estimation."},"KGEntitiesResponse":{"properties":{"entities":{"items":{"$ref":"#/components/schemas/Entity"},"type":"array","title":"Entities","description":"The list of entities in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of entities in the graph for the collection or document."}},"type":"object","required":["entities","total_entries"],"title":"KGEntitiesResponse","description":"Response for knowledge graph entities.","example":{"entities":[{"description":"Description 1","id":"1","name":"Entity 1"},{"description":"Description 2","id":"2","name":"Entity 2"}],"total_entries":2}},"KGEntityDeduplicationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user."}},"type":"object","required":["message"],"title":"KGEntityDeduplicationResponse","description":"Response for knowledge graph entity deduplication.","example":{"message":"Entity deduplication queued successfully."}},"KGEntityResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGEntityResult","description":"Entity Description","metadata":{},"name":"Entity Name"},"KGGlobalResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGGlobalResult","description":"Global Result Description","metadata":{},"name":"Global Result Name"},"KGRelationshipResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGRelationshipResult","description":"Relationship Description","metadata":{},"name":"Relationship Name"},"KGRunType":{"type":"string","enum":["estimate","run"],"title":"KGRunType","description":"Type of KG run."},"KGSearchMethod":{"type":"string","enum":["local"],"const":"local","title":"KGSearchMethod"},"KGSearchResult":{"properties":{"method":{"$ref":"#/components/schemas/KGSearchMethod"},"content":{"anyOf":[{"$ref":"#/components/schemas/KGEntityResult"},{"$ref":"#/components/schemas/KGRelationshipResult"},{"$ref":"#/components/schemas/KGCommunityResult"},{"$ref":"#/components/schemas/KGGlobalResult"}],"title":"Content"},"result_type":{"anyOf":[{"$ref":"#/components/schemas/KGSearchResultType"},{"type":"null"}]},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"metadata":{"type":"object","title":"Metadata","default":{}}},"type":"object","required":["method","content"],"title":"KGSearchResult","content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"},"KGSearchResultType":{"type":"string","enum":["entity","relationship","community"],"title":"KGSearchResultType"},"KGSearchSettings":{"properties":{"entities_level":{"$ref":"#/components/schemas/EntityLevel","description":"The level of entities to search for","default":"document"},"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"graphrag_map_system":{"type":"string","title":"Graphrag Map System","description":"The system prompt for the graphrag map prompt.","default":"graphrag_map_system"},"graphrag_reduce_system":{"type":"string","title":"Graphrag Reduce System","description":"The system prompt for the graphrag reduce prompt.","default":"graphrag_reduce_system"},"use_kg_search":{"type":"boolean","title":"Use Kg Search","description":"Whether to use KG search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","description":"KG search type","default":"local"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level","description":"KG search level"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph search."},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"local","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"KGTriplesResponse":{"properties":{"triples":{"items":{"$ref":"#/components/schemas/Triple"},"type":"array","title":"Triples","description":"The list of triples in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of triples in the graph for the collection or document."}},"type":"object","required":["triples","total_entries"],"title":"KGTriplesResponse","description":"Response for knowledge graph triples.","example":{"total_entries":2,"triples":[{"description":"Paris is the capital of France","object":"France","predicate":"is capital of","subject":"Paris"}]}},"KGTunePromptResponse":{"properties":{"tuned_prompt":{"type":"string","title":"Tuned Prompt","description":"The updated prompt."}},"type":"object","required":["tuned_prompt"],"title":"KGTunePromptResponse","description":"Response containing just the tuned prompt string.","example":{"tuned_prompt":"The updated prompt"}},"ListVectorIndicesResponse":{"properties":{"indices":{"items":{"type":"object"},"type":"array","title":"Indices"}},"type":"object","required":["indices"],"title":"ListVectorIndicesResponse"},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PaginatedResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[CollectionResponse]]"},"PaginatedResultsWrapper_list_ConversationOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/ConversationOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[ConversationOverviewResponse]]"},"PaginatedResultsWrapper_list_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentChunkResponse]]"},"PaginatedResultsWrapper_list_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentOverviewResponse]]"},"PaginatedResultsWrapper_list_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserOverviewResponse]]"},"PaginatedResultsWrapper_list_UserResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/shared__api__models__management__responses__UserResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserResponse]]"},"PromptResponse":{"properties":{"prompt_id":{"type":"string","format":"uuid","title":"Prompt Id"},"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["prompt_id","name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"},"conversation_id":{"type":"string","title":"Conversation Id","description":"The conversation ID for the RAG agent response"}},"type":"object","required":["messages","conversation_id"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"RawChunk":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"RawChunk"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_CollectionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CollectionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CollectionResponse]"},"ResultsWrapper_CreateVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CreateVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CreateVectorIndexResponse]"},"ResultsWrapper_DeleteVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/DeleteVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[DeleteVectorIndexResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_KGCommunitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCommunitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCommunitiesResponse]"},"ResultsWrapper_KGEntitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEntitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEntitiesResponse]"},"ResultsWrapper_KGTriplesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTriplesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTriplesResponse]"},"ResultsWrapper_KGTunePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTunePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTunePromptResponse]"},"ResultsWrapper_ListVectorIndicesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/ListVectorIndicesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[ListVectorIndicesResponse]"},"ResultsWrapper_NoneType_":{"properties":{"results":{"type":"null","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[NoneType]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_Union_KGEntityDeduplicationResponse__KGDeduplicationEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGEntityDeduplicationResponse"},{"$ref":"#/components/schemas/KGDeduplicationEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGEntityDeduplicationResponse, KGDeduplicationEstimationResponse]]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/shared__api__models__auth__responses__UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_CollectionOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionOverviewResponse]]"},"ResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionResponse]]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_list_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[LogResponse]]"},"ResultsWrapper_list_Tuple_str__Message___":{"properties":{"results":{"items":{"prefixItems":[{"type":"string"},{"$ref":"#/components/schemas/Message"}],"type":"array","maxItems":2,"minItems":2},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[Tuple[str, Message]]]"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"Triple":{"properties":{"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"subject":{"type":"string","title":"Subject"},"predicate":{"type":"string","title":"Predicate"},"object":{"type":"string","title":"Object"},"weight":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weight","default":1.0},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"predicate_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Predicate Embedding"},"extraction_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Extraction Ids","default":[]},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"}],"title":"Attributes","default":{}}},"type":"object","required":["subject","predicate","object"],"title":"Triple","description":"A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities."},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"offset":{"type":"integer","minimum":0.0,"title":"Offset","description":"Offset to paginate search results","default":0},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"type":"integer","title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"type":"integer","title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"$ref":"#/components/schemas/HybridSearchSettings","description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"type":"string","title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","limit":20,"offset":0,"probes":10,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true},"VectorTableName":{"type":"string","enum":["vectors","document_entity","collection_entity","community_report"],"title":"VectorTableName","description":"This enum represents the different tables where we store vectors."},"shared__api__models__auth__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-29T09:47:24.038501"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-29T09:47:24.038510"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"shared__api__models__management__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-29T09:47:24.055195"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-29T09:47:24.055198"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} diff --git a/docs/components/GithubButtons.jsx b/docs/components/GithubButtons.jsx deleted file mode 100644 index 3b2c92980..000000000 --- a/docs/components/GithubButtons.jsx +++ /dev/null @@ -1,49 +0,0 @@ -import React, { useEffect } from 'react'; - -const GithubButtons = () => { - useEffect(() => { - const script = document.createElement('script'); - script.src = 'https://buttons.github.io/buttons.js'; - script.async = true; - script.defer = true; - document.body.appendChild(script); - - return () => { - document.body.removeChild(script); - }; - }, []); - - return ( -
- - Star - - - Watch - - - Fork - -
- ); -}; - -export default GithubButtons; diff --git a/docs/cookbooks/advanced-graphrag.mdx b/docs/cookbooks/advanced-graphrag.mdx deleted file mode 100644 index 60ed7efa1..000000000 --- a/docs/cookbooks/advanced-graphrag.mdx +++ /dev/null @@ -1,210 +0,0 @@ ---- -title: 'Advanced GraphRAG' -description: 'Advanced GraphRAG Techniques with R2R' -icon: 'chart-network' ---- - - -## Advanced GraphRAG Techniques - -R2R supports advanced GraphRAG techniques that can be easily configured at runtime. This flexibility allows you to experiment with different SoTA strategies and optimize your RAG pipeline for specific use cases. - - - -Advanced GraphRAG techniques are still a beta feature in R2R.There may be limitations in observability and analytics when implementing them. - -Are we missing an important technique? If so, then please let us know at founders@sciphi.ai. - - - - -### Prompt Tuning - -One way that we can improve upon GraphRAG's already impressive capabilities by tuning our prompts to a specific domain. When we create a knowledge graph, an LLM extracts the relationships between entities; but for very targeted domains, a general approach may fall short. - -To demonstrate this, we can run GraphRAG over the technical papers for the 2024 Nobel Prizes in chemistry, medicine, and physics. By tuning our prompts for GraphRAG, we attempt to understand our documents at a high level, and provide the LLM with a more pointed description. - -The following script, which utilizes the Python SDK, generates the tuned prompts and calls the knowledge graph creation process with these prompts at runtime: - -```python -# Step 1: Tune the prompts for knowledge graph creation -# Tune the entity description prompt -entity_prompt_response = client.get_tuned_prompt( - prompt_name="graphrag_entity_description" -) -tuned_entity_prompt = entity_prompt_response['results']['tuned_prompt'] - -# Tune the triples extraction prompt -triples_prompt_response = client.get_tuned_prompt( - prompt_name="graphrag_triples_extraction_few_shot" -) -tuned_triples_prompt = triples_prompt_response['results']['tuned_prompt'] - -# Step 2: Create the knowledge graph -kg_settings = { - "kg_entity_description_prompt": tuned_entity_prompt -} - -# Generate the initial graph -graph_response = client.create_graph( - run_type="run", - kg_creation_settings=kg_settings -) - -# Step 3: Clean up the graph by removing duplicate entities -client.deduplicate_entities( - run_type="run", - collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09' -) - -# Step 4: Tune and apply community reports prompt for graph enrichment -community_prompt_response = client.get_tuned_prompt( - prompt_name="graphrag_community_reports" -) -tuned_community_prompt = community_prompt_response['results']['tuned_prompt'] - -# Configure enrichment settings -kg_enrichment_settings = { - "community_reports_prompt": tuned_community_prompt -} - -# Enrich the graph with additional information -client.enrich_graph( - run_type="run", - kg_enrichment_settings=kg_enrichment_settings -) -``` - -For illustrative purposes, we look can look at the `graphrag_entity_description` prompt before and after prompt tuning. It's clear that with prompt tuning, we are able to capture the intent of the documents, giving us a more targeted prompt overall. - - - -```yaml -Provide a comprehensive yet concise summary of the given entity, incorporating its description and associated triples: - -Entity Info: -{entity_info} -Triples: -{triples_txt} - -Your summary should: -1. Clearly define the entity's core concept or purpose -2. Highlight key relationships or attributes from the triples -3. Integrate any relevant information from the existing description -4. Maintain a neutral, factual tone -5. Be approximately 2-3 sentences long - -Ensure the summary is coherent, informative, and captures the essence of the entity within the context of the provided information. -``` - - - - -```yaml -Provide a comprehensive yet concise summary of the given entity, focusing on its significance in the field of scientific research, while incorporating its description and associated triples: - -Entity Info: -{entity_info} -Triples: -{triples_txt} - -Your summary should: -1. Clearly define the entity's core concept or purpose within computational biology, artificial intelligence, and medicine -2. Highlight key relationships or attributes from the triples that illustrate advancements in scientific understanding and reasoning -3. Integrate any relevant information from the existing description, particularly breakthroughs and methodologies -4. Maintain a neutral, factual tone -5. Be approximately 2-3 sentences long - -Ensure the summary is coherent, informative, and captures the essence of the entity within the context of the provided information, emphasizing its impact on the field. -``` - - - - -After prompt tuning, we see an increase in the number of communities—after prompt tuning, these communities appear more focused and domain-specific with clearer thematic boundaries. - -Prompt tuning produces: -- **More precise community separation:** GraphRAG alone produced a single `MicroRNA Research` Community, which GraphRAG with prompt tuning produced communities around `C. elegans MicroRNA Research`, `LET-7 MicroRNA`, and `miRNA-184 and EDICT Syndrome`. -- **Enhanced domain focus:** Previously, we had a single community for `AI Researchers`, but with prompt tuning we create specialized communities such as `Hinton, Hopfield, and Deep Learning`, `Hochreiter and Schmidhuber`, and `Minksy and Papert's ANN Critique.` - -| Count | GraphRAG | GraphRAG with Prompt Tuning | -|-------------|----------|-----------------------------| -| Entities | 661 | 636 | -| Triples | 509 | 503 | -| Communities | 29 | 41 | - -Prompt tuning allow for us to generate communities that better reflect the natural organization of the domain knowledge while maintaining more precise technical and thematic boundaries between related concepts. - -## Contextual Chunk Enrichment - -Contextual chunk enrichment is a technique that allows us to capture the semantic meaning of the entities and relationships in the knowledge graph. This is done by using a combination of the entity's textual description and its contextual embeddings. This enrichment process enhances the quality and depth of information in your knowledge graph by: - -1. Analyzing the surrounding context of each entity mention -2. Incorporating semantic information from related passages -3. Preserving important contextual nuances that might be lost in simple entity extraction - -You can learn more about contextual chunk enrichment [here](/cookbooks/contextual-enrichment). - - -### Entity Deduplication - -When creating a knowledge graph across multiple documents, entities are initially created at the document level. This means that the same real-world entity (e.g., "Albert Einstein" or "CRISPR") might appear multiple times if it's mentioned in different documents. This duplication can lead to: - -- Redundant information in your knowledge graph -- Fragmented relationships across duplicate entities -- Increased storage and processing overhead -- Potentially inconsistent entity descriptions - -The `deduplicate-entities` endpoint addresses these issues by: -1. Identifying similar entities using name (exact match, other strategies coming soon) -2. Merging their properties and relationships -3. Maintaining the most comprehensive description -4. Removing the duplicate entries - - - -```bash -r2r deduplicate-entities --collection-id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09 --run - -# Example Response -[{'message': 'Deduplication task queued successfully.', 'task_id': 'd9dae1bb-5862-4a16-abaf-5297024df390'}] -``` - - - -```python -from r2r import R2RClient - -client = R2RClient("http://localhost:7272") -client.deduplicate_entities( - collection_id="122fdf6a-e116-546b-a8f6-e4cb2e2c0a09", - run_type="run" -) - -# Example Response -[{'message': 'Deduplication task queued successfully.', 'task_id': 'd9dae1bb-5862-4a16-abaf-5297024df390'}] -``` - - - -#### Monitoring Deduplication - -You can monitor the deduplication process in two ways: - -1. **Hatchet Dashboard**: Access the dashboard at http://localhost:7274 to view: - - Task status and progress - - Any errors or warnings - - Completion time estimates - -2. **API Endpoints**: Once deduplication is complete, verify the results using these endpoints with `entity_level = collection`: - - [Entities API](http://localhost:7272/v2/entities?collection_id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09&entity_level=collection) - - [Triples API](http://localhost:7272/v2/triples?collection_id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09&entity_level=collection) - -#### Best Practices - -When using entity deduplication: - -- Run deduplication after initial graph creation but before any enrichment steps -- Monitor the number of entities before and after to ensure expected reduction -- Review a sample of merged entities to verify accuracy -- For large collections, expect the process to take longer and plan accordingly diff --git a/docs/cookbooks/advanced-rag.mdx b/docs/cookbooks/advanced-rag.mdx deleted file mode 100644 index 04dc0c187..000000000 --- a/docs/cookbooks/advanced-rag.mdx +++ /dev/null @@ -1,297 +0,0 @@ ---- -title: 'Advanced RAG' -description: 'Learn how to build and use advanced RAG techniques with R2R' -icon: 'wand-magic-sparkles' ---- - - -# Advanced RAG Techniques - -R2R supports advanced Retrieval-Augmented Generation (RAG) techniques that can be easily configured at runtime. This flexibility allows you to experiment with different SoTA strategies and optimize your RAG pipeline for specific use cases. **This cookbook will cover toggling between vanilla RAG, [HyDE](https://arxiv.org/abs/2212.10496) and [RAG-Fusion](https://arxiv.org/abs/2402.03367).**. - - - -Advanced RAG techniques are still a beta feature in R2R. They are not currently supported in agentic workflows and there may be limitations in observability and analytics when implementing them. - - -Are we missing an important RAG technique? If so, then please let us know at founders@sciphi.ai. - - - -## Advanced RAG in R2R -R2R is designed from the ground up to make it easy to implement advanced RAG techniques. Its modular architecture, based on orchestrated pipes and pipelines, allows for easy customization and extension. A generic implementation diagram of the system is shown below: - -```mermaid -graph TD - A[User Query] --> B[QueryTransformPipe] - B --> C[MultiSearchPipe] - C --> G[VectorSearchPipe] - F[Postgres DB] --> G - C --> D[GraphSearchPipe] - H[GraphDB DB] --> D - - D --> E[RAG Generation] - G --> E[RAG Generation] - A --> E - -``` - -## Supported Advanced RAG Techniques - -R2R currently supports two advanced RAG techniques: - -1. **HyDE (Hypothetical Document Embeddings)**: Enhances retrieval by generating and embedding hypothetical documents based on the query. -2. **RAG-Fusion**: Improves retrieval quality by combining results from multiple search iterations. - -## Using Advanced RAG Techniques - -You can specify which advanced RAG technique to use by setting the `search_strategy` parameter in your vector search settings. Below is a comprehensive overview of techniques supported by R2R. - -### HyDE - -#### What is HyDE? - -HyDE is an innovative approach that supercharges dense retrieval, especially in zero-shot scenarios. Here's how it works: - -1. **Query Expansion**: HyDE uses a Language Model to generate hypothetical answers or documents based on the user's query. -2. **Enhanced Embedding**: These hypothetical documents are embedded, creating a richer semantic search space. -3. **Similarity Search**: The embeddings are used to find the most relevant actual documents in your database. -4. **Informed Generation**: The retrieved documents and original query are used to generate the final response. - -#### Implementation Diagram - - - -The diagram which follows below illustrates the HyDE flow which fits neatly into the schema of our diagram above (note, the GraphRAG workflow is omitted for brevity): - -```mermaid - -graph TD - A[User Query] --> B[QueryTransformPipe] - B -->|Generate Hypothetical Documents| C[MultiSearchPipe] - C --> D[VectorSearchPipe] - D --> E[RAG Generation] - A --> E - F[Document DB] --> D - - subgraph HyDE Process - B --> G[Hypothetical Doc 1] - B --> H[Hypothetical Doc 2] - B --> I[Hypothetical Doc n] - G --> J[Embed] - H --> J - I --> J - J --> C - end - - subgraph Vector Search - D --> K[Similarity Search] - K --> L[Rank Results] - L --> E - end - - C --> |Multiple Searches| D - K --> |Retrieved Documents| L -``` - -#### Using HyDE in R2R - - - - - - -```python -from r2r import R2RClient - -client = R2RClient() - -hyde_response = client.rag( - "What are the main themes in Shakespeare's plays?", - vector_search_settings={ - "search_strategy": "hyde", - "search_limit": 10 - } -) - -print('hyde_response = ', hyde_response) -``` - - - - -```bash -r2r rag --query="who was aristotle" --use-hybrid-search --stream --search-strategy=hyde -``` - - - -```bash Sample Output -'results': { - 'completion': ... - 'search_results': { - 'vector_search_results': [ - { - ... - 'score': 0.7715058326721191, - 'text': '## Paragraph from the Chapter\n\nThe Fundamental Theorem of Calculus states that if a function is continuous on a closed interval [a, b], then the function has an antiderivative in the interval [a, b]. This theorem is a cornerstone of calculus and has far-reaching consequences in various fields, including physics, engineering, and economics. The theorem can be proved through the use of Riemann sums and the limit process, which provides a rigorous foundation for understanding the relationship between integration and differentiation. The theorem highlights the deep connection between these two branches of mathematics, offering a unified framework for analyzing functions and their derivatives.' - 'metadata': { - 'associated_query': 'The fundamental theorem of calculus states that if a function is continuous on the interval [a, b] and F is an antiderivative of f on [a, b], then the integral of f from a to b is equal to F(b) - F(a). This theorem links the concept of differentiation with that of integration, providing a way to evaluate definite integrals without directly computing the limit of a sum.', - ... - } - }, - ], - ... - } -} -``` -### RAG-Fusion - -#### What is RAG-Fusion? - -RAG-Fusion is an advanced technique that combines Retrieval-Augmented Generation (RAG) with Reciprocal Rank Fusion (RRF) to improve the quality and relevance of retrieved information. Here's how it works: - -1. **Query Expansion**: The original query is used to generate multiple related queries, providing different perspectives on the user's question. -2. **Multiple Retrievals**: Each generated query is used to retrieve relevant documents from the database. -3. **Reciprocal Rank Fusion**: The retrieved documents are re-ranked using the RRF algorithm, which combines the rankings from multiple retrieval attempts. -4. **Enhanced RAG**: The re-ranked documents, along with the original and generated queries, are used to generate the final response. - -This approach helps to capture a broader context and potentially more relevant information compared to traditional RAG. - -#### Implementation Diagram - -Here's a diagram illustrating the RAG-Fusion workflow (again, we omit the GraphRAG pipeline for brevity): - -```mermaid -graph TD - A[User Query] --> B[QueryTransformPipe] - B -->|Generate Multiple Queries| C[MultiSearchPipe] - C --> D[VectorSearchPipe] - D --> E[RRF Reranking] - E --> F[RAG Generation] - A --> F - G[Document DB] --> D - - subgraph RAG-Fusion Process - B --> H[Generated Query 1] - B --> I[Generated Query 2] - B --> J[Generated Query n] - H --> C - I --> C - J --> C - end - - subgraph Vector Search - D --> K[Search Results 1] - D --> L[Search Results 2] - D --> M[Search Results n] - K --> E - L --> E - M --> E - end - - E --> |Re-ranked Documents| F -``` - -#### Using RAG-Fusion in R2R - - - - - -```python -from r2r import R2RClient - -client = R2RClient() - -rag_fusion_response = client.rag( - "Explain the theory of relativity", - vector_search_settings={ - "search_strategy": "rag_fusion", - "search_limit": 20 - } -) - -print('rag_fusion_response = ', rag_fusion_response) -``` - - - -```bash -r2r rag --query="Explain the theory of relativity" --use-hybrid-search --stream --search-strategy=rag_fusion -``` - - - - - -```bash Sample Output -'results': { - 'completion': ... - 'search_results': { - 'vector_search_results': [ - { - ... - 'score': 0.04767399003253049, - 'text': '18. The theory of relativity, proposed by Albert Einstein in 1905, is a fundamental theory in modern physics that describes the relationships between space, time, and matter. The theory is based on two postulates, which are the principle of relativity and the invariant speed of light. The principle of relativity states that all inertial reference frames are equivalent, while the invariant speed of light refers to the constant speed of light in vacuum, independent of the motion of the emitting body.\n\n19. Through the use of space-time diagrams, we can graphically represent events and their relationships in space and time. By plotting events on a Minkowski diagram, which is a four-dimensional representation of space and time, we can visualize time dilation and length contraction, two key effects of the theory of relativity. The hyperbola of light in the Minkowski diagram illustrates the invariant speed of light, providing a clear depiction of the geometry of space and time.', - 'metadata': { - 'associated_queries': ['What is the theory of relativity?', "What are the key principles of Einstein's theory of relativity?", 'How does the theory of relativity impact our understanding of space and time?'], - ... - } - }, - ], - ... - } -} -``` - - - -### Combining with Other Settings - -You can readily combine these advanced techniques with other search and RAG settings: - -```python -custom_rag_response = client.rag( - "Describe the impact of climate change on biodiversity", - vector_search_settings={ - "search_strategy": "hyde", - "search_limit": 15, - "use_hybrid_search": True - }, - rag_generation_config={ - "model": "anthropic/claude-3-opus-20240229", - "temperature": 0.7 - } -) -``` - - -## Customization and Server-Side Defaults - -While R2R allows for runtime configuration of these advanced techniques, it's worth noting that server-side defaults can also be modified for consistent behavior across your application. This includes the ability to update and customize prompts used for techniques like HyDE and RAG-Fusion. - -- For general configuration options, refer to the R2R [configuration documentation](/documentation/configuration/introduction). -- To learn about customizing prompts, including those used for HyDE and RAG-Fusion, see the [prompt configuration documentation](/documentation/configuration/prompts). - -Prompts play a crucial role in shaping the behavior of these advanced RAG techniques. By modifying the HyDE and RAG-Fusion prompts, you can fine-tune the query expansion and retrieval processes to better suit your specific use case or domain. - - -Currently, these advanced techniques use a hard-coded multi-search configuration in the `MultiSearchPipe`: - -```python -class MultiSearchPipe(AsyncPipe): - class PipeConfig(AsyncPipe.PipeConfig): - name: str = "multi_search_pipe" - use_rrf: bool = False - rrf_k: int = 60 # RRF constant - num_queries: int = 3 - expansion_factor: int = 3 # Factor to expand results before RRF -``` - -This configuration will be made user-configurable in the near future, allowing for even greater flexibility in customizing these advanced RAG techniques. - - -## Conclusion - -By leveraging these advanced RAG techniques and customizing their underlying prompts, you can significantly enhance the quality and relevance of your retrieval and generation processes. Experiment with different strategies, settings, and prompt variations to find the optimal configuration for your specific use case. The flexibility of R2R allows you to iteratively improve your system's performance and adapt to changing requirements. diff --git a/docs/cookbooks/agent.mdx b/docs/cookbooks/agent.mdx deleted file mode 100644 index d9000fbe3..000000000 --- a/docs/cookbooks/agent.mdx +++ /dev/null @@ -1,179 +0,0 @@ ---- -title: 'Agents' -description: 'Learn how to implement and use agents with R2R' -icon: 'robot' ---- - -This cookbook demonstrates how to use the agentic capabilities which ship by default in R2R. The R2R agent is an intelligent system that can formulate its own questions, search for information, and provide informed responses based on the retrieved context. It can be customized on the fly. - - Agents in the R2R framework are still in beta. Please let us know what you like/dislike and what features you would like to see added. - -## Understanding R2R's RAG Agent - -R2R's RAG agent is designed to provide powerful, context-aware responses by combining large language models with a search capability over your ingested documents. When you initialize an R2R application, it automatically creates a RAG assistant that's ready to use. -R2R plans to extend its agent functionality to mirror core features supported by OpenAI and more, including: - -- Multiple tool support (e.g., code interpreter, file search) -- Persistent conversation threads -- Complete end to end observability of agent interactions - - -R2R also provides support for local RAG capabilities, allowing you to create AI agents that can access and reason over your local document store, entirely offline. - -The RAG agent is also available for use through the R2R API, specifically via the `agent` endpoint. - -## Using the RAG Agent - -Now, let's use the RAG agent to answer questions: - -```python -def rag_agent_example(): - # Prepare messages for the chat - messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - { - "role": "user", - "content": "Who is the greatest philospher of all time?", - }, - { - "role": "assistant", - "content": "Aristotle is widely considered the greatest philospher of all time.", - }, - { - "role": "user", - "content": "Can you tell me more about him?", - }, - ] - - # Use the RAG assistant via the agent endpoint - response = client.agent( - messages=messages, - vector_search_settings={"search_limit": 5, "filters": {}}, - kg_search_settings={"use_kg_search": False}, - rag_generation_config={"max_tokens": 300} - ) - - print("RAG Assistant Response:") - # Note, response includes the full conversation, including steps taken by the assistant to produce the final result. - print(response) - -# Run the example -rag_agent_example() - -# RAG Assistant Response: -# { -# 'results': [ -# {'role': 'system', 'content': '## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.', 'name': None, 'function_call': None, 'tool_calls': None}, -# {'role': 'system', 'content': 'You are a helpful assistant.', 'name': None, 'function_call': None, 'tool_calls': None}, -# {'role': 'user', 'content': 'Who is the greatest philospher of all time?', 'name': None, 'function_call': None, 'tool_calls': None}, -# {'role': 'assistant', 'content': 'Aristotle is widely considered the greatest philospher of all time.', 'name': None, 'function_call': None, 'tool_calls': None}, -# {'role': 'user', 'content': 'Can you tell me more about him?', 'name': None, 'function_call': None, 'tool_calls': None}, -# {'role': 'assistant', 'content': None, 'name': None, 'function_call': {'name': 'search', 'arguments': '{"query":"Aristotle biography"}'}, 'tool_calls': None}, -# {'role': 'function', 'content': "1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n", 'name': 'search', 'function_call': None, 'tool_calls': None}, -# {'role': 'assistant', 'content': "Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.", 'name': None, 'function_call': None, 'tool_calls': None} -# ] -# } - -``` - -In this example, the agent might formulate its own questions to gather more information before providing a response. For instance, it could ask the search tool about Aristotle's scientific works or his methods of inquiry. - -## Streaming Responses - -To see the agent's thought process in real-time, you can use streaming responses: - -```python -def streaming_rag_agent_example(): - messages = [ - {"role": "system", "content": "You are a helpful agent with access to a large knowledge base. You can ask questions to gather more information if needed."}, - {"role": "user", "content": "What were Aristotle's main philosophical ideas?"}, - ] - - streaming_response = client.agent( - messages=messages, - vector_search_settings={"search_limit": 5, "filters": {}}, - kg_search_settings={"use_kg_search": False}, - rag_generation_config={"max_tokens": 300, "stream": True} - ) - - print("Streaming RAG Assistant Response:") - for chunk in streaming_response: - print(chunk, end="", flush=True) - print() # New line after streaming response - -streaming_rag_agent_example() - -# Streaming RAG Assistant Response: - -# search{"query":"Aristotle's main philosophical ideas"}1. Politics -# Main article: Politics (Aristotle) -# 2. Ethics -# Main article: Aristotelian ethics -# Aristotle considered ethics to be a practical rather than theoretical study, i.e., one aimed at becoming good and doing good rather than knowing for its own sake. He wrote several treatises on ethics, most notably including the Nicomachean Ethics.[117] -# 3. Metaphysics -# Main article: Metaphysics (Aristotle) -# The word "metaphysics" appears to have been coined by the first century AD editor who assembled various small selections of Aristotle's works to the treatise we know by the name Metaphysics.[34] Aristotle called it "first philosophy", and distinguished it from mathematics and natural science (physics) as the contemplative (theoretikē) philosophy which is "theological" and studies the divine. He wrote in his Metaphysics (1026a16): -# 4. Practical philosophy -# Aristotle's practical philosophy covers areas such as ethics, politics, economics, and rhetoric.[40] -# 5. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science. -# Aristotle, an ancient Greek philosopher, made significant contributions across various fields of philosophy. Here are some of his main philosophical ideas: -# -# 1. **Metaphysics**: Aristotle's metaphysics, often referred to as "first philosophy," deals with the nature of reality, existence, and the fundamental nature of being. He introduced the concept of substance and essence, and his work laid the groundwork for later metaphysical thought [Metaphysics (Aristotle)]. -# -# 2. **Ethics**: Aristotle's ethical philosophy is primarily outlined in his work "Nicomachean Ethics." He emphasized the concept of virtue ethics, which focuses on the development of good character traits (virtues) and living a life of moral excellence. He believed that the ultimate goal of human life is eudaimonia, often translated as "happiness" or "flourishing," achieved through virtuous living [Aristotelian ethics]. -# -# 3. **Politics**: In his work "Politics," Aristotle explored the nature of human communities and the role of the state. He believed that humans are naturally political animals and that the state exists to promote the good life. He analyzed different forms of government and advocated for a constitutional government that balances the interests of different social classes [Politics (Aristotle)]. -# -# 4. **Practical Philosophy**: Aristotle's practical philosophy encompasses ethics, politics, economics, and rhetoric. He believed that philosophy should be practical and aimed at improving human life and society [Practical philosophy]. -# -# 5. **Natural Sciences**: Aristotle made substantial contributions to the natural sciences, including biology, physics, and astronomy. He conducted empirical observations and classified various forms of life, laying the foundation for the scientific method [Aristotle's contributions to natural sciences]. - -# Aristotle's extensive body of work has had a profound and lasting impact on Western philosophy and science, influencing countless thinkers and shaping the development of various academic disciplines. - -``` - -This will produce a streaming response, showing the agent's thought process, including its search queries and the gradual construction of its response. - -## Customizing the RAG Agent - This section is still under development, please proceed carefully when customizing agents. -You can customize various aspects of the RAG agent's behavior: - -```python -custom_response = client.agent( - messages=[ - {"role": "system", "content": "You are an expert on ancient Greek philosophy. Ask clarifying questions if needed."}, - {"role": "user", "content": "Compare Aristotle's and Plato's views on the nature of reality."}, - ], - vector_search_settings={"search_limit": 25, "use_hybrid_search": True, "filters": {"category": {"$eq": "philosophy"}}}, - kg_search_settings={"use_kg_search": False}, - rag_generation_config={ - "max_tokens": 500, - "temperature": 0.7, - "model": "openai/gpt-4o" # Assuming you have access to GPT-4 - } -) - -print("Customized RAG Agent Response:") -print(custom_response) -``` - -This example demonstrates how to: -- Set custom search filters -- Enable hybrid search (combining vector and keyword search) -- Adjust the number of search results -- Customize the generation config -- Use a specific model for the response - -... - -## Conclusion - -The R2R RAG assistant is a powerful tool that combines large language models with advanced search capabilities. By leveraging your ingested documents and the flexible `agent` endpoint, you can create dynamic, context-aware conversational experiences. - -Key takeaways: -1. R2R automatically creates a RAG assistant when initializing the application. -2. The `agent` endpoint provides easy access to the RAG assistant's capabilities. -3. You can customize various aspects of the assistant's behavior, including search settings and generation parameters. -4. Streaming responses allow for real-time interaction and visibility into the assistant's thought process. - -Experiment with different settings and configurations to find the optimal setup for your specific use case. Remember to keep your ingested documents up-to-date to ensure the assistant has access to the most relevant and recent information. diff --git a/docs/cookbooks/application.mdx b/docs/cookbooks/application.mdx deleted file mode 100644 index ed0de2d49..000000000 --- a/docs/cookbooks/application.mdx +++ /dev/null @@ -1,125 +0,0 @@ ---- -title: 'Application' -description: 'Learn how to set up and use the R2R Application for managing your instance.' -icon: 'display' ---- - -R2R offers an [open-source React+Next.js application](https://github.com/SciPhi-AI/R2R-Application) designed to give developers an administrative portal for their R2R deployment, and users an application to communicate with out of the box. - -## Setup - -### Install PNPM - -PNPM is a fast, disk space-efficient package manager. To install PNPM, visit the [official PNPM installation page](https://pnpm.io/installation) or follow these instructions: - - - - -For Unix-based systems (Linux, macOS): - -```bash -curl -fsSL https://get.pnpm.io/install.sh | sh - -``` - -For Windows: - -```powershell -iwr https://get.pnpm.io/install.ps1 -useb | iex -``` - -After installation, you may need to add PNPM to your system's PATH. - - - - -### Installing and Running the R2R Dashboard - -If you're running R2R with the Docker, you already have the R2R application running! Just navigate to [http://localhost:7273](http://localhost:7273). - -If you're running R2R outside of Docker, run the following commands to install the R2R Dashboard. - -1. Clone the project repository and navigate to the project directory: - -```bash -git clone git@github.com:SciPhi-AI/R2R-Application.git -cd R2R-Application -``` - -2. Install the project dependencies: - -```bash -pnpm install -``` - -3. Build and start the application for production: - -```bash -pnpm build -pnpm start -``` - -The dashboard will be available at [http://localhost:3000](http://localhost:3000). - -## Features - -### Login - -To interact with R2R with the dashboard, you must first login. If it's your first time logging in, log in with the default credentials shown. - -By default, an R2R instance is hosted on port 7272. The login page will include this URL by default, but be sure to update the URL if your R2R instance is deployed elsewhere. For information about deploying a local R2R application server, see the [quickstart](/documentation/quickstart). - -![R2R Dashboard Overview](/images/login.png) - -### Documents - -The documents page provides an overview of uploaded documents and their metadata. You can upload new documents and update, download, or delete existing ones. Additionally, you can view information about each document, including the documents' chunks and previews of PDFs. - -![Documents Page](/images/oss_dashboard_documents.png) - -### Collections - -Collections allow users to create and share sets of documents. The collections page provides a place to manage your existing collections or create new collections. - -![Collections Page](/images/oss_collections_page.png) - -### Chat - -In the chat page, you can stream RAG responses with different models and configurable settings. You can interact with both the RAG Agent and RAG endpoints here. - -![Chat Interface](/images/chat.png) - -### Users - -Manage your users and gain insight into their interactions. - -![Users Page](/images/users.png) - -### Logs - -The Logs page enables tracking of user queries, search results, and LLM responses. - -![Logs Page](/images/logs.png) - -### Settings - -The settings page allows you to view the configuration of and edit the prompts associated with your R2R deployment. - -![Logs Page](/images/settings_config.png) -![Logs Page](/images/settings_prompts.png) - -## Development - -To develop the R2R dashboard: - -1. Start the development server: - -```bash -pnpm dev -``` - -2. Run pre-commit checks (optional but recommended): - -```bash -pnpm format -pnpm lint -``` diff --git a/docs/cookbooks/collections.mdx b/docs/cookbooks/collections.mdx deleted file mode 100644 index 8925c1eea..000000000 --- a/docs/cookbooks/collections.mdx +++ /dev/null @@ -1,263 +0,0 @@ ---- -title: 'Collections' -description: 'A comprehensive guide to creating collections in R2R' -icon: 'database' ---- - -## Introduction - -A collection in R2R is a logical grouping of users and documents that allows for efficient access control and organization. Collections enable you to manage permissions and access to documents at a group level, rather than individually. - -R2R provides robust document collection management, allowing developers to implement efficient access control and organization of users and documents. This cookbook will guide you through the collection capabilities in R2R. - -For user authentication, please refer to the [User Auth Cookbook](/cookbooks/user-auth). - - -Collection permissioning in R2R is still under development and as a result the is likely to API continue evolving in future releases. - - - -```mermaid -graph TD - A[User] --> B(Authentication) - B --> C{Authenticated?} - C -->|Yes| D[Authenticated User] - C -->|No| E[Guest User] - D --> F[Collection Management] - D --> G[Document Management] - D --> H[User Profile Management] - G --> I[CRUD Operations] - G --> J[Search & RAG] - D --> K[Logout] - L[Admin] --> M[Superuser Authentication] - M --> N[Superuser Capabilities] - N --> O[User Management] - N --> P[System-wide Document Access] - N --> Q[Analytics & Observability] - N --> R[Configuration Management] -``` -_A diagram showing user and collection management across r2r_ - -## Basic Usage - - -Collections currently follow a flat hierarchy wherein superusers are responsible for management operations. This functionality will expand as development on R2R continues. - - -### Creating a Collection - -Let's start by creating a new collection: - -```python -from r2r import R2RClient - -client = R2RClient("http://localhost:7272") # Replace with your R2R deployment URL - -# Assuming you're logged in as an admin or a user with appropriate permissions -# For testing, the default R2R implementation will grant superuser privileges to anon api calls -collection_result = client.create_collection("Marketing Team", "Collection for marketing department") - -print(f"Collection creation result: {collection_result}") -# {'results': {'collection_id': '123e4567-e89b-12d3-a456-426614174000', 'name': 'Marketing Team', 'description': 'Collection for marketing department', 'created_at': '2024-07-16T22:53:47.524794Z', 'updated_at': '2024-07-16T22:53:47.524794Z'}} -``` - -### Getting Collection details - -To retrieve details about a specific collection: - -```python -collection_id = '123e4567-e89b-12d3-a456-426614174000' # Use the collection_id from the creation result -collection_details = client.get_collection(collection_id) - -print(f"Collection details: {collection_details}") -# {'results': {'collection_id': '123e4567-e89b-12d3-a456-426614174000', 'name': 'Marketing Team', 'description': 'Collection for marketing department', 'created_at': '2024-07-16T22:53:47.524794Z', 'updated_at': '2024-07-16T22:53:47.524794Z'}} -``` - -### Updating a Collection - -You can update a collection's name or description: - -```python -update_result = client.update_collection(collection_id, name="Updated Marketing Team", description="New description for marketing team") - -print(f"Collection update result: {update_result}") -# {'results': {'collection_id': '123e4567-e89b-12d3-a456-426614174000', 'name': 'Updated Marketing Team', 'description': 'New description for marketing team', 'created_at': '2024-07-16T22:53:47.524794Z', 'updated_at': '2024-07-16T23:15:30.123456Z'}} -``` - -### Listing Collections - -To get a list of all collections: - -```python -collections_list = client.list_collections() - -print(f"Collections list: {collections_list}") -# {'results': [{'collection_id': '123e4567-e89b-12d3-a456-426614174000', 'name': 'Updated Marketing Team', 'description': 'New description for marketing team', 'created_at': '2024-07-16T22:53:47.524794Z', 'updated_at': '2024-07-16T23:15:30.123456Z'}, ...]} -``` - -## User Management in Collections - -### Adding a User to a Collection - -To add a user to a collection, you need both the user's ID and the collections's ID: - -```python -user_id = '456e789f-g01h-34i5-j678-901234567890' # This should be a valid user ID -add_user_result = client.add_user_to_collection(user_id, collection_id) - -print(f"Add user to collection result: {add_user_result}") -# {'results': {'message': 'User successfully added to the collection'}} -``` - -### Removing a User from a Collections - -Similarly, to remove a user from a collection: - -```python -remove_user_result = client.remove_user_from_collection(user_id, collection_id) - -print(f"Remove user from collection result: {remove_user_result}") -# {'results': None} -``` - -### Listing Users in a Collection - -To get a list of all users in a specific collection: - -```python -users_in_collection = client.get_users_in_collection(collection_id) - -print(f"Users in collection: {users_in_collection}") -# {'results': [{'user_id': '456e789f-g01h-34i5-j678-901234567890', 'email': 'user@example.com', 'name': 'John Doe', ...}, ...]} -``` - -### Getting Collections for a User - -To get all collections that a user is a member of: - -```python -user_collections = client.user_collections(user_id) - -print(f"User's collections: {user_collections}") -# {'results': [{'collection_id': '123e4567-e89b-12d3-a456-426614174000', 'name': 'Updated Marketing Team', ...}, ...]} -``` - -## Document Management in Collections - -### Assigning a Document to a Collection - -To assign a document to a collection: - -```python -document_id = '789g012j-k34l-56m7-n890-123456789012' # This should be a valid document ID -assign_doc_result = client.assign_document_to_collection(document_id, collection_id) - -print(f"Assign document to collection result: {assign_doc_result}") -# {'results': {'message': 'Document successfully assigned to the collection'}} -``` - -### Removing a Document from a Collection - -To remove a document from a collection: - -```python -remove_doc_result = client.remove_document_from_collection(document_id, collection_id) - -print(f"Remove document from collection result: {remove_doc_result}") -# {'results': {'message': 'Document successfully removed from the collection'}} -``` - -### Listing Documents in a Collection - -To get a list of all documents in a specific collection: - -```python -docs_in_collection = client.documents_in_collection(collection_id) - -print(f"Documents in collection: {docs_in_collection}") -# {'results': [{'document_id': '789g012j-k34l-56m7-n890-123456789012', 'title': 'Marketing Strategy 2024', ...}, ...]} -``` - -### Getting Collections for a Document - -To get all collections that a document is assigned to: - -```python -document_collections = client.document_collections(document_id) - -print(f"Document's collections: {document_collections}") -# {'results': [{'collection_id': '123e4567-e89b-12d3-a456-426614174000', 'name': 'Updated Marketing Team', ...}, ...]} -``` - -## Advanced Collection Management - -### Collection Overview - -To get an overview of collection, including user and document counts: - -```python -collections_overview = client.collections_overview() - -print(f"Collections overview: {collections_overview}") -# {'results': [{'collection_id': '123e4567-e89b-12d3-a456-426614174000', 'name': 'Updated Marketing Team', 'description': 'New description for marketing team', 'user_count': 5, 'document_count': 10, ...}, ...]} -``` - -### Deleting a Collection - -To delete a collection: - -```python -delete_result = client.delete_collection(collection_id) - -print(f"Delete collection result: {delete_result}") -# {'results': {'message': 'Collection successfully deleted'}} -``` - -## Pagination and Filtering - -Many of the collection-related methods support pagination and filtering. Here are some examples: - -```python -# List collections with pagination -paginated_collection = client.list_collections(offset=10, limit=20) - -# Get users in a collection with pagination -paginated_users = client.get_users_in_collection(collection_id, offset=5, limit=10) - -# Get documents in a collection with pagination -paginated_docs = client.documents_in_collection(collection_id, offset=0, limit=50) - -# Get collections overview with specific collection IDs -specific_collections_overview = client.collections_overview(collection_ids=['id1', 'id2', 'id3']) -``` - -## Security Considerations - -When implementing collection permissions, consider the following security best practices: - -1. **Least Privilege Principle**: Assign the minimum necessary permissions to users and collections. -2. **Regular Audits**: Periodically review collection memberships and document assignments. -3. **Access Control**: Ensure that only authorized users (e.g., admins) can perform collection management operations. -4. **Logging and Monitoring**: Implement comprehensive logging for all collection-related actions. - -## Customizing Collection Permissions - -While R2R's current collection system follows a flat hierarchy, you can build more complex permission structures on top of it: - -1. **Custom Roles**: Implement application-level roles within collections (e.g., collection admin, editor, viewer). -2. **Hierarchical Collections**: Create a hierarchy by establishing parent-child relationships between collections in your application logic. -3. **Permission Inheritance**: Implement rules for permission inheritance based on collection memberships. - -## Troubleshooting - -Here are some common issues and their solutions: - -1. **Unable to Create/Modify Collections**: Ensure the user has superuser privileges. -2. **User Not Seeing Collection Content**: Verify that the user is correctly added to the collection and that documents are properly assigned. -3. **Performance Issues with Large Collections**: Use pagination when retrieving users or documents in large collections. - -## Conclusion - -R2R's collection permissioning system provides a foundation for implementing sophisticated access control in your applications. As the feature set evolves, more advanced capabilities will become available. Stay tuned to the R2R documentation for updates and new features related to collection permissions. - -For user authentication and individual user management, refer to the [User Auth Cookbook](/cookbooks/user-auth). For more advanced use cases or custom implementations, consult the R2R documentation or reach out to the community for support. diff --git a/docs/cookbooks/contextual-enrichment.mdx b/docs/cookbooks/contextual-enrichment.mdx deleted file mode 100644 index dc28f1cf0..000000000 --- a/docs/cookbooks/contextual-enrichment.mdx +++ /dev/null @@ -1,151 +0,0 @@ ---- -title: 'Contextual Chunk Enrichment' -description: 'Enhance your RAG system chunks with rich contextual information' -icon: 'puzzle-piece' ---- - -# Understanding Chunk Enrichment in RAG Systems - -In modern Retrieval-Augmented Generation (RAG) systems, documents are systematically broken down into smaller, manageable pieces called chunks. While chunking is essential for efficient vector search operations, these individual chunks sometimes lack the broader context needed for comprehensive question answering or analysis tasks. - -## The Challenge of Context Loss - -Let's examine a real-world example using Lyft's 2021 annual report (Form 10-K) from their [public filing](https://github.com/SciPhi-AI/R2R/blob/main/py/core/examples/data/lyft_2021.pdf). - -During ingestion, this 200+ page document is broken into 1,223 distinct chunks. Consider this isolated chunk: - -```plaintext -storing unrented and returned vehicles. These impacts to the demand for and operations of the different rental programs have and may continue to adversely affect our business, financial condition and results of operation. -``` - -Reading this chunk in isolation raises several questions: -- What specific impacts are being discussed? -- Which rental programs are affected? -- What's the broader context of these business challenges? - -This is where contextual enrichment becomes invaluable. - -## Introducing Contextual Enrichment - -Contextual enrichment is an advanced technique that enhances chunks with relevant information from surrounding or semantically related content. Think of it as giving each chunk its own "memory" of related information. - -### Enabling Enrichment - -To activate this feature, configure your `r2r.toml` file with the following settings: - -```toml -[ingestion.chunk_enrichment_settings] - enable_chunk_enrichment = true # disabled by default - strategies = ["semantic", "neighborhood"] - forward_chunks = 3 # Look ahead 3 chunks - backward_chunks = 3 # Look behind 3 chunks - semantic_neighbors = 10 # Find 10 semantically similar chunks - semantic_similarity_threshold = 0.7 # Minimum similarity score - generation_config = { model = "openai/gpt-4o-mini" } -``` - -## Enrichment Strategies Explained - -R2R implements two sophisticated strategies for chunk enrichment: - -### 1. Neighborhood Strategy -This approach looks at the document's natural flow by examining chunks that come before and after the target chunk: -- **Forward Looking**: Captures upcoming context (configurable, default: 3 chunks) -- **Backward Looking**: Incorporates previous context (configurable, default: 3 chunks) -- **Use Case**: Particularly effective for narrative documents where context flows linearly - -### 2. Semantic Strategy -This method uses advanced embedding similarity to find related content throughout the document: -- **Vector Similarity**: Identifies chunks with similar meaning regardless of location -- **Configurable Neighbors**: Customizable number of similar chunks to consider -- **Similarity Threshold**: Set minimum similarity scores to ensure relevance -- **Use Case**: Excellent for documents with themes repeated across different sections - -## The Enrichment Process - -When enriching chunks, R2R uses a carefully crafted prompt to guide the LLM: - -```plaintext -## Task: - -Enrich and refine the given chunk of text using information from the provided context chunks. The goal is to make the chunk more precise and self-contained. - -## Context Chunks: -{context_chunks} - -## Chunk to Enrich: -{chunk} - -## Instructions: -1. Rewrite the chunk in third person. -2. Replace all common nouns with appropriate proper nouns. -3. Use information from the context chunks to enhance clarity. -4. Ensure the enriched chunk remains independent and self-contained. -5. Maintain original scope without bleeding information. -6. Focus on precision and informativeness. -7. Preserve original meaning while improving clarity. -8. Output only the enriched chunk. - -## Enriched Chunk: -``` - -## Implementation and Results - -To process your documents with enrichment: - -```bash -r2r ingest-files --file_paths path/to/lyft_2021.pdf -``` - -### Viewing Enriched Results - -Access your enriched chunks through the API: -``` -http://localhost:7272/v2/document_chunks/{document_id} -``` - -Let's compare the before and after of our example chunk: - -**Before Enrichment:** -```plaintext -storing unrented and returned vehicles. These impacts to the demand for and operations of the different rental programs have and may continue to adversely affect our business, financial condition and results of operation. -``` - -**After Enrichment:** -```plaintext -The impacts of the COVID-19 pandemic on the demand for and operations of the various vehicle rental programs, including Lyft Rentals and the Express Drive program, have resulted in challenges regarding the storage of unrented and returned vehicles. These adverse conditions are anticipated to continue affecting Lyft's overall business performance, financial condition, and operational results. -``` - -Notice how the enriched version: -- Specifies the cause (COVID-19 pandemic) -- Names specific programs (Lyft Rentals, Express Drive) -- Provides clearer context about the business impact -- Maintains professional, third-person tone - -## Metadata and Storage - -The system maintains both enriched and original versions: - -```json -{ - "results": [ - { - "text": "enriched_version", - "metadata": { - "original_text": "original_version", - "chunk_enrichment_status": "success", - // ... additional metadata ... - } - } - ] -} -``` - -This dual storage ensures transparency and allows for version comparison when needed. - -## Best Practices - -1. **Tune Your Parameters**: Adjust `forward_chunks`, `backward_chunks`, and `semantic_neighbors` based on your document structure -2. **Monitor Enrichment Quality**: Regularly review enriched chunks to ensure they maintain accuracy -3. **Consider Document Type**: Different documents may benefit from different enrichment strategies -4. **Balance Context Size**: More context isn't always better - find the sweet spot for your use case diff --git a/docs/cookbooks/graphrag.mdx b/docs/cookbooks/graphrag.mdx deleted file mode 100644 index 6ebc6a14c..000000000 --- a/docs/cookbooks/graphrag.mdx +++ /dev/null @@ -1,503 +0,0 @@ ---- -title: 'GraphRAG' -description: 'Learn how to build and use GraphRAG with R2R' -icon: 'diagram-project' ---- - -## Introduction - -GraphRAG is a powerful feature of R2R that allows you to perform graph-based search and retrieval. This guide will walk you through the process of setting it up and running your first queries. - - - - - - - - -Note that graph construction may take long for local LLMs, we recommend using cloud LLMs for faster results. - - - -## Start server - -We provide three configurations for R2R: Light, Light with Local LLMs, and Full with Docker+Hatchet. If you want to get started quickly, we recommend using R2R Light. If you want to run large graph workloads, we recommend using R2R Full with Docker+Hatchet. - - - -```bash -r2r serve -``` - - -``` toml -[app] -# app settings are global available like `r2r_config.agent.app` -# project_name = "r2r_default" # optional, can also set with `R2R_PROJECT_NAME` env var - -[agent] -system_instruction_name = "rag_agent" -tool_names = ["search"] - -[auth] -provider = "r2r" -access_token_lifetime_in_minutes = 60 -refresh_token_lifetime_in_days = 7 -require_authentication = false -require_email_verification = false -default_admin_email = "admin@example.com" -default_admin_password = "change_me_immediately" - - -[completion] -provider = "litellm" -concurrent_request_limit = 256 - - [completion.generation_config] - model = "openai/gpt-4o" - temperature = 0.1 - top_p = 1 - max_tokens_to_sample = 1_024 - stream = false - add_generation_kwargs = { } - -[crypto] -provider = "bcrypt" - -[database] -provider = "postgres" -default_collection_name = "Default" -default_collection_description = "Your default collection." -enable_fts = true # whether or not to enable full-text search, e.g `hybrid search` - -[embedding] -provider = "litellm" -base_model = "openai/text-embedding-3-small" -base_dimension = 512 -batch_size = 128 -add_title_as_prefix = false -rerank_model = "None" -concurrent_request_limit = 256 -quantization_settings = { quantization_type = "FP32" } - -[file] -provider = "postgres" - -[ingestion] -provider = "r2r" -chunking_strategy = "recursive" -chunk_size = 1_024 -chunk_overlap = 512 -excluded_parsers = ["mp4"] - - [ingestion.chunk_enrichment_settings] - enable_chunk_enrichment = false # disabled by default - strategies = ["semantic", "neighborhood"] - forward_chunks = 3 - backward_chunks = 3 - semantic_neighbors = 10 - semantic_similarity_threshold = 0.7 - generation_config = { model = "openai/gpt-4o-mini" } - -[database] -provider = "postgres" -batch_size = 256 - - [database.kg_creation_settings] - kg_entity_description_prompt = "graphrag_entity_description" - kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" - entity_types = [] # if empty, all entities are extracted - relation_types = [] # if empty, all relations are extracted - fragment_merge_count = 4 # number of fragments to merge into a single extraction - max_knowledge_triples = 100 - max_description_input_length = 65536 - generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for triplet extraction - - [database.kg_entity_deduplication_settings] - kg_entity_deduplication_type = "by_name" - kg_entity_deduplication_prompt = "graphrag_entity_deduplication" - max_description_input_length = 65536 - generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication - - [database.kg_enrichment_settings] - community_reports_prompt = "graphrag_community_reports" - max_summary_input_length = 65536 - generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering - leiden_params = {} - - [database.kg_search_settings] - entities_level = "document" # set to collection if you've run deduplication - map_system_prompt = "graphrag_map_system" - reduce_system_prompt = "graphrag_reduce_system" - generation_config = { model = "openai/gpt-4o-mini" } - -[logging] -provider = "r2r" -log_table = "logs" -log_info_table = "log_info" - -[orchestration] -provider = "simple" - - -[prompt] -provider = "r2r" -``` - - - - -```bash -r2r serve --config-name=local_llm -``` - - -``` toml -[agent] -system_instruction_name = "rag_agent" -tool_names = ["search"] - - [agent.generation_config] - model = "ollama/llama3.1" - -[completion] -provider = "litellm" -concurrent_request_limit = 1 - - [completion.generation_config] - model = "ollama/llama3.1" - temperature = 0.1 - top_p = 1 - max_tokens_to_sample = 1_024 - stream = false - add_generation_kwargs = { } - -[embedding] -provider = "ollama" -base_model = "mxbai-embed-large" -base_dimension = 1_024 -batch_size = 128 -add_title_as_prefix = true -concurrent_request_limit = 2 - -[orchestration] -provider = "simple" -``` - - - - -```bash -r2r serve --full --docker --config-name=full -``` - -``` toml -[ingestion] -provider = "unstructured_local" -strategy = "auto" -chunking_strategy = "by_title" -new_after_n_chars = 512 -max_characters = 1_024 -combine_under_n_chars = 128 -overlap = 256 - -[orchestration] -provider = "hatchet" -kg_creation_concurrency_lipmit = 32 -ingestion_concurrency_limit = 128 -kg_enrichment_concurrency_limit = 8 -``` - - - - - -## Ingesting files - -We begin the cookbook by ingesting the default sample file `aristotle.txt` used across R2R tutorials and cookbooks: - - - -```bash -r2r ingest-sample-file -# or -r2r ingest-files /path/to/your/files_or_directory - -# Example Response -[{'message': 'Ingestion task queued successfully.', 'task_id': '2b16bb55-4f47-4e66-a6bd-da9e215b9793', 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1'}] -``` - - - -```python -from r2r import R2RClient - -import requests -import tempfile -import os - -# URL of the raw content on GitHub -url = "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/aristotle.txt" - -# Fetch the content -response = requests.get(url) -content = response.text - -# Create a temporary file -with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.txt', prefix='aristotle_') as temp_file: - # Write the content to the temporary file - temp_file.write(content) - temp_file_path = temp_file.name - -client = R2RClient("http://localhost:7272") -client.ingest_files([temp_file_path]) - -# Example Response -[{'message': 'Ingestion task queued successfully.', 'task_id': '2b16bb55-4f47-4e66-a6bd-da9e215b9793', 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1'}] -``` - - - -The initial ingestion step adds parses the given documents and inserts them into R2R's relational and vector databases, enabling document management and semantic search over them. The `aristotle.txt` example file is typically ingested in under 10s. You can confirm ingestion is complete by querying the documents overview table: - -```bash -r2r documents-overview - -# Example Response -{'id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'type': 'txt', 'created_at': '2024-09-05T18:20:47.921933Z', 'updated_at': '2024-09-05T18:20:47.921938Z', 'ingestion_status': 'success', 'restructuring_status': 'pending', 'version': 'v0', 'collection_ids': [], 'metadata': {'version': 'v0'}} -``` - -When ingestion completes successfully for a given file we will find that `ingestion_status` reads `success` in the corresponding output. You can also view in R2R's dashboard on http://localhost:7273 that the file has been ingested. - -![Ingested File](../images/kg_ingestion_status.png) - -## Create Knowledge Graph - -Knowledge graph creation is done in two steps: - -1. `create-graph`: Extracts nodes and relationships from your input document collection. -2. `enrich-graph`: Enhances the graph structure through clustering and explaining entities (commonly referred to as `GraphRAG`). - - - - -```bash -# Cost Estimation step. -# collection ID is optional. If you don't specify one, the default collection will be used. -r2r create-graph --collection-id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09 - -This will run a cost estimation step to give you an estimate of the cost of the graph creation process. - -# Example Response -Time taken: 0.21 seconds -{ - "results": { - "message": "Ran Graph Creation Estimate (not the actual run). Note that these are estimated ranges, actual values may vary. To run the KG creation process, run `create-graph` with `--run` in the cli, or `run_type=\"run\"` in the client.", - "document_count": 2, - "number_of_jobs_created": 3, - "total_chunks": 29, - "estimated_entities": "290 - 580", - "estimated_triples": "362 - 870", - "estimated_llm_calls": "348 - 638", - "estimated_total_in_out_tokens_in_millions": "0 - 1", - "estimated_total_time_in_minutes": "Depends on your API key tier. Accurate estimate coming soon. Rough estimate: 0.0 - 0.17", - "estimated_cost_in_usd": "0.0 - 0.06" - } -} - -# Then, you can run the graph creation process with: -r2r create-graph --collection-id= --run - -# Example response for R2R Light -[{'message': 'Graph created successfully, please run enrich-graph to enrich the graph for GraphRAG.'}] - -# Example Response for R2R Full. This call is non-blocking and returns immediately. We can check the status using the hatchet dashboard on http://localhost:7274. Details below: -[{'message': 'Graph creation task queued successfully.', 'task_id': 'd9dae1bb-5862-4a16-abaf-5297024df390'}] -``` - - - -```python -from r2r import R2RClient - -client = R2RClient("http://localhost:7272") -kg_search_settings = { 'run_mode': 'estimate' } -estimate = client.create_graph(collection_id=collection_id, kg_search_settings=kg_search_settings) -print(estimate) -# This will run a cost estimation step to give you an estimate of the cost of the graph creation process. - -# Example Response -Time taken: 0.21 seconds -{ - "results": { - "message": "These are estimated ranges, actual values may vary. To run the KG creation process, run `create-graph` with `--run` in the cli, or `run_mode=\"run\"` in the client.", - "document_count": 2, - "number_of_jobs_created": 3, - "total_chunks": 29, - "estimated_entities": "290 - 580", - "estimated_triples": "362 - 870", - "estimated_llm_calls": "348 - 638", - "estimated_total_in_out_tokens_in_millions": "0 - 1", - "estimated_total_time_in_minutes": "Depends on your API key tier. Accurate estimate coming soon. Rough estimate: 0.0 - 0.17", - "estimated_cost_in_usd": "0.0 - 0.06" - } -} - -# Then, you can run the graph creation process with: -kg_search_settings = { 'run_mode': 'run' } -client.create_graph(collection_id=collection_id, kg_search_settings=kg_search_settings) - -# Example response for R2R Light -[{'message': 'Graph created successfully, please run enrich-graph to enrich the graph for GraphRAG.'}] - -# Example Response for R2R Full. This call is non-blocking and returns immediately. We can check the status using the hatchet dashboard on http://localhost:7274. Details below: -[{'message': 'Graph creation task queued successfully.', 'task_id': 'd9dae1bb-5862-4a16-abaf-5297024df390'}] - - -``` - - - - -If you are using R2R Full, you can log into the hatchet dashboard on http://localhost:7274 (admin@example.com / Admin123!!) to check the status of the graph creation process. Please make sure all the `kg-extract-*` tasks are completed before running the enrich-graph step. - -![Hatchet Dashboard](../images/kg_extraction_progress.png) - - -This step will create a knowledge graph with nodes and relationships. You can get the entities and relationships in the graph using our dashboard on http://localhost:7273 or by calling the following API endpoints. These hit the /v2/entities and /v2/triples endpoints respectively. This will by default use the `entity_level=document` query parameter to get the entities and triples at the document level. We will set the default collection id to `122fdf6a-e116-546b-a8f6-e4cb2e2c0a09` when submitting requests to the endpoints below. - -- Entities: [Entities](/api-reference/endpoint/entities) -- Triples: [Triples](/api-reference/endpoint/entities) - -## Graph Enrichment - -Now we have a searchable graph, but this graph is not enriched yet. It does not have any community level information. We will now run the enrichment step. - -The graph enrichment step performs hierarchical leiden clustering to create communities, and embeds the descriptions. These embeddings will be used later in the local search stage of the pipeline. If you are more interested in the algorithm, please refer to the blog post [here](https://www.sciphi.ai/blog/graphrag). - - - -```bash -# collection ID is optional. If you don't specify one, the default collection will be used. -r2r enrich-graph --collection-id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09 - -# Similar to the graph creation step, this will run a cost estimation step to give you an estimate of the cost of the graph enrichment process. -Time taken: 0.22 seconds -{ - "results": { - "total_entities": 269, - "total_triples": 345, - "estimated_llm_calls": "26 - 53", - "estimated_total_in_out_tokens_in_millions": "0.05 - 0.11", - "estimated_total_time_in_minutes": "Depends on your API key tier. Accurate estimate coming soon. Rough estimate: 0.01 - 0.02", - "estimated_cost_in_usd": "0.0 - 0.01" - } -} - -# Now, you can run the graph enrichment process with: -r2r enrich-graph --collection-id=122fdf6a-e116-546b-a8f6-e4cb2e2c0a09 --run - -# Example Response with R2R Light -[{'message': 'Graph enriched successfully.'}] - -# Example Response with R2R Full, you can check the status using the hatchet dashboard on http://localhost:7274. -[{'message': 'Graph enrichment task queued successfully.', 'task_id': 'd9dae1bb-5862-4a16-abaf-5297024df390'}] -``` - - - -```python -from r2r import R2RClient - -client = R2RClient("http://localhost:7272") -kg_search_settings = { 'run_mode': 'estimate' } -collection_id = "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09" # optional, if not specified, the default collection will be used. -estimate = client.enrich_graph(collection_id=collection_id, kg_search_settings=kg_search_settings) -print(estimate) - -# This will run a cost estimation step to give you an estimate of the cost of the graph enrichment process. - -# Example Response -Time taken: 0.22 seconds -{ - "results": { - "total_entities": 269, - "total_triples": 345, - "estimated_llm_calls": "26 - 53", - "estimated_total_in_out_tokens_in_millions": "0.05 - 0.11", - "estimated_total_time_in_minutes": "Depends on your API key tier. Accurate estimate coming soon. Rough estimate: 0.01 - 0.02", - "estimated_cost_in_usd": "0.0 - 0.01" - } -} - -# Now, you can run the graph enrichment process with: -kg_search_settings = { 'run_mode': 'run' } -client.enrich_graph(collection_id=collection_id, kg_search_settings=kg_search_settings) - -# Example Response with R2R Light -[{'message': 'Graph enriched successfully.'}] - -# Example Response with R2R Full, you can check the status using the hatchet dashboard on http://localhost:7274. -[{'message': 'Graph enrichment task queued successfully.', 'task_id': 'd9dae1bb-5862-4a16-abaf-5297024df390'}] -``` - - - -If you're using R2R Full, you can similarly check that all `community-summary-*` tasks are completed before proceeding. - - -Now you can see that the graph is enriched with the following information. We have added descriptions and embeddings to the nodes and relationships. Also, each node is mapped to a community. Following is a visualization of the enriched graph (deprecated as of now. We are working on a new visualization tool): - -You can see the list of communities in the graph using the following API endpoint: - -- Communities: [Communities](/api-reference/endpoint/communities) - -## Search - -A knowledge graph search performs similarity search on the entity and community description embeddings. - -```bash - -r2r search --query="Who is Aristotle?" --use-kg-search - -# The answer will be returned in JSON format and contains results from entities, relationships and communities. Following is a snippet of the output: - -Vector search results: -[ - { - 'fragment_id': 'ecc754cd-380d-585f-84ac-021542ef3c1d', - 'extraction_id': '92d78034-8447-5046-bf4d-e019932fbc20', - 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'collection_ids': [], - 'score': 0.7393344796100582, - 'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n\nLittle is known about Aristotle's life. He was born in the city of Stagira in northern Greece during the Classical period. His father, Nicomachus, died when Aristotle was a child, and he was brought up by a guardian. At 17 or 18, he joined Plato's Academy in Athens and remained there until the age of 37 (c.\u2009347 BC). Shortly after Plato died, Aristotle left Athens and, at the request of Philip II of Macedon, tutored his son Alexander the Great beginning in 343 BC. He established a library in the Lyceum, which helped him to produce many of his hundreds of books on papyrus scrolls.\n\nThough Aristotle wrote many elegant treatises and dia ...", - 'metadata': {'title': 'aristotle.txt', 'version': 'v0', 'file_name': 'tmpm3ceiqs__aristotle.txt', 'chunk_order': 0, 'document_type': 'txt', 'size_in_bytes': 73353, 'unstructured_filetype': 'text/plain', 'unstructured_languages': ['eng'], 'partitioned_by_unstructured': True, 'associatedQuery': 'Who is Aristotle?'}} - }, ... -] - -KG search results: -{ - 'local_result': { - 'query': 'Who is Aristotle?', - 'entities': {'0': {'name': 'Aristotle', 'description': 'Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...'}}, - 'relationships': {}, - 'communities': {'0': {'summary': '```json\n{\n "title": "Aristotle and His Contributions",\n "summary": "The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.",\n "rating": 9.5,\n "rating_explanation": "The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.",\n "findings": [\n {\n "summary": "Aristotle's Foundational Role in Logic",\n "explanation": "Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six bo ...}}}} - }, - 'global_result': None -} -Time taken: 2.39 seconds -``` - -# Conclusion - -In conclusion, integrating R2R with GraphRAG significantly enhances the capabilities of your RAG applications. By leveraging the power of graph-based knowledge representations, GraphRAG allows for more nuanced and context-aware information retrieval. This is evident in the example query we ran using R2R, which not only retrieved relevant information but also provided a structured analysis of the key contributions of Aristotle to modern society. - -In essence, combining R2R with GraphRAG empowers your RAG applications to deliver more intelligent, context-aware, and insightful responses, making it a powerful tool for advanced information retrieval and analysis tasks. - -Feel free to reach out to us at founders@sciphi.ai if you have any questions or need further assistance. - - -# Advanced GraphRAG Techniques - -If you want to learn more about the advanced techniques that we use in GraphRAG, please refer to the [Advanced GraphRAG Techniques](/cookbooks/advanced-graphrag) page. diff --git a/docs/cookbooks/hybrid-search.mdx b/docs/cookbooks/hybrid-search.mdx deleted file mode 100644 index 934fdd3a8..000000000 --- a/docs/cookbooks/hybrid-search.mdx +++ /dev/null @@ -1,146 +0,0 @@ ---- -title: 'Hybrid Search' -description: 'Learn how to implement and use hybrid search with R2R' -icon: 'magnifying-glass' ---- - -## Introduction - -R2R's hybrid search combines traditional keyword-based searching with modern semantic understanding, providing more accurate and contextually relevant results. This approach is particularly effective for complex queries where both specific terms and overall meaning are crucial. - -## How R2R Hybrid Search Works - -1. **Full-Text Search**: Utilizes PostgreSQL's full-text search with `ts_rank_cd` and `websearch_to_tsquery`. -2. **Semantic Search**: Performs vector similarity search using the query's embedded representation. -3. **Reciprocal Rank Fusion (RRF)**: Merges results from full-text and semantic searches using the formula: - ``` - COALESCE(1.0 / (rrf_k + full_text.rank_ix), 0.0) * full_text_weight + - COALESCE(1.0 / (rrf_k + semantic.rank_ix), 0.0) * semantic_weight - ``` -4. **Result Ranking**: Orders final results based on the combined RRF score. - -## Key Features - -### Full-Text Search - -The full-text search component incorporates: - -- PostgreSQL's `tsvector` for efficient text searching -- `websearch_to_tsquery` for parsing user queries -- `ts_rank_cd` for ranking full-text search results - -### Semantic Search - -The semantic search component uses: - -- Vector embeddings for storing and querying semantic representations -- Cosine similarity for measuring the relevance of documents to the query - -## Configuration - -### VectorSearchSettings - -Key settings for vector search configuration: - -```python -class VectorSearchSettings(BaseModel): - use_hybrid_search: bool - search_limit: int - filters: dict[str, Any] - hybrid_search_settings: Optional[HybridSearchSettings] - # ... other settings -``` - -### HybridSearchSettings - -Specific parameters for hybrid search: - -```python -class HybridSearchSettings(BaseModel): - full_text_weight: float - semantic_weight: float - full_text_limit: int - rrf_k: int -``` - -## Usage Example - -```python -from r2r import R2RClient - -client = R2RClient() - -vector_settings = { - "use_hybrid_search": True, - "search_limit": 20, - # Can add logical filters, as shown: - # "filters":{"category": {"$eq": "technology"}}, - "hybrid_search_settings" : { - "full_text_weight": 1.0, - "semantic_weight": 5.0, - "full_text_limit": 200, - "rrf_k": 50 - } -} - -results = client.search( - query="Who was Aristotle?", - vector_search_settings=vector_settings -) -print(results) -``` - -## Results Comparison - -### Basic Vector Search - -```json -{ - "results": { - "vector_search_results": [ - { - "score": 0.780314067545999, - "text": "Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath...", - "metadata": { - "title": "aristotle.txt", - "version": "v0", - "chunk_order": 0 - } - }, ... - ] - } -} -``` - -### Hybrid Search with RRF - -```json -{ - "results": { - "vector_search_results": [ - { - "score": 0.0185185185185185, - "text": "Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath...", - "metadata": { - "title": "aristotle.txt", - "version": "v0", - "chunk_order": 0, - "semantic_rank": 1, - "full_text_rank": 3 - }, ... - } - ] - } -} -``` - -## Best Practices - -1. Optimize PostgreSQL indexing for both full-text and vector searches -2. Regularly update search indices -3. Monitor performance and adjust weights as needed -4. Use appropriate vector dimensions and embedding models for your use case - -## Conclusion - -R2R's hybrid search offers a powerful solution for complex information retrieval needs, combining the strengths of keyword matching and semantic understanding. Its flexible configuration and use of Reciprocal Rank Fusion make it adaptable to a wide range of use cases, from technical documentation to broad, context-dependent queries. diff --git a/docs/cookbooks/ingestion.mdx b/docs/cookbooks/ingestion.mdx deleted file mode 100644 index 2fa4c209b..000000000 --- a/docs/cookbooks/ingestion.mdx +++ /dev/null @@ -1,167 +0,0 @@ ---- -title: 'Ingestion Cookbook' -description: 'Learn how to ingest, update, and delete documents with R2R' -icon: 'file-arrow-up' ---- - -## Introduction - -R2R provides a powerful and flexible ingestion pipeline that allows you to efficiently process and manage various types of documents. This cookbook will guide you through the process of ingesting files, updating existing documents, and deleting documents using the R2R Python SDK. - - - -As of version `3.2.13`, we have expanded the options for ingesting files using multimodal foundation models. In addition to using such models by default for images, R2R can now use them on PDFs by passing the following in your ingestion configuration: - -```json -"ingestion_config": { - ..., - "parser_overrides": { - "pdf": "zerox" - } -} -``` - -We recommend this method for achieving the highest quality ingestion results. - - - - -## Ingesting Files - -To ingest files into your R2R system, you can use the `ingest_files` method from the Python SDK: - -```python -file_paths = ['path/to/file1.txt', 'path/to/file2.txt'] -metadatas = [{'key1': 'value1'}, {'key2': 'value2'}] - -ingest_response = client.ingest_files( - file_paths=file_paths, - metadatas=metadatas, - ingestion_config={ - "provider": "unstructured_local", - "strategy": "auto", - "chunking_strategy": "by_title", - "new_after_n_chars": 256, - "max_characters": 512, - "combine_under_n_chars": 64, - "overlap": 100, - } -) -``` - -The `ingest_files` method accepts the following parameters: - -- `file_paths` (required): A list of file paths or directory paths to ingest. -- `metadatas` (optional): A list of metadata dictionaries corresponding to each file. -- `document_ids` (optional): A list of document IDs to assign to the ingested files. -- `ingestion_config` (optional): Custom ingestion settings to override the default configuration, which you can read more about [here](/documentation/configuration/ingestion/overview). - -## Ingesting Chunks - -If you have pre-processed chunks of text, you can directly ingest them using the `ingest_chunks` method: - -```python -chunks = [ - {"text": "This is the first chunk."}, - {"text": "This is the second chunk."} -] - -ingest_response = client.ingest_chunks( - chunks=chunks, - document_id="custom_document_id", - metadata={"custom_metadata": "value"}, -) -``` - -The `ingest_chunks` method accepts the following parameters: - -- `chunks` (required): A list of dictionaries containing the text and metadata for each chunk. -- `document_id` (optional): A custom document ID to assign to the ingested chunks. -- `metadata` (optional): Additional metadata to associate with the ingested chunks. - -## Updating Files - -To update existing documents in your R2R system, you can use the `update_files` method: - -```python -file_paths = ['path/to/updated_file1.txt', 'path/to/updated_file2.txt'] -document_ids = ['document1_id', 'document2_id'] - -update_response = client.update_files( - file_paths=file_paths, - document_ids=document_ids, - metadatas=[{"version": "2.0"}, {"version": "1.5"}], -) -``` - -The `update_files` method accepts the following parameters: - -- `file_paths` (required): A list of file paths for the updated documents. -- `document_ids` (required): A list of document IDs corresponding to the files being updated. -- `metadatas` (optional): A list of metadata dictionaries to update for each document. - - -## Updating Chunks - -To update specific chunks within existing documents in your R2R deployment, you can use the `update_chunks` method: - -```python -document_id = "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" -extraction_id = "aeba6400-1bd0-5ee9-8925-04732d675434" - -update_response = client.update_chunks( - document_id=document_id, - extraction_id=extraction_id, - text="Updated chunk content with new information...", - metadata={ - "source": "manual_edit", - "edited_at": "2024-10-24", - "editor": "John Doe" - } -) -``` - -The `update_chunks` method accepts the following parameters: - -- `document_id` (required): The ID of the document containing the chunk you want to update. -- `extraction_id` (required): The ID of the specific chunk you want to update. -- `text` (required): The new text content that will replace the existing chunk text. -- `metadata` (optional): A metadata dictionary that will replace the existing chunk metadata. -- `run_with_orchestration` (optional): Whether to run the update through orchestration (default: true). - -This method is particularly useful when you need to: -- Correct errors in specific chunks -- Update outdated information -- Add or modify metadata for individual chunks -- Make targeted changes without reprocessing entire documents - -Note that updating chunks will trigger a re-vectorization of the modified content, ensuring that your vector search capabilities remain accurate with the updated information. - - -## Deleting Documents and Chunks - -To delete documents or chunks from your R2R deployment, you can use the `delete` method: - -```python -# For documents -delete_response = client.delete( - { - "document_id": {"$eq": "document1_id"} - } -) - -# For chunks -delete_response = client.delete( - { - "extraction_id": {"$eq": "extraction1_id"} - } -) -``` - -The `delete` method accepts a dictionary specifying the filters to identify the documents to delete. In this example, it deletes the document with the ID "document1_id" and the chunk with the ID "extraction1_id." - -## Conclusion - -R2R's ingestion pipeline provides a flexible and efficient way to process, update, and manage your documents. By utilizing the `ingest_files`, `ingest_chunks`, `update_files`, and `delete` methods from the Python SDK, you can seamlessly integrate document management capabilities into your applications. - -For more detailed information on the available parameters and response formats, refer to the [Python SDK Ingestion Documentation](/documentation/python-sdk/ingestion). diff --git a/docs/cookbooks/maintenance.mdx b/docs/cookbooks/maintenance.mdx deleted file mode 100644 index e1521867d..000000000 --- a/docs/cookbooks/maintenance.mdx +++ /dev/null @@ -1,263 +0,0 @@ ---- -title: 'Maintenance & Scaling' -description: 'Learn how to maintain and scale your R2R system' -icon: 'paint-roller' ---- - -This guide covers essential maintenance tasks for R2R deployments, with a focus on vector index management and system updates. Understanding when and how to build vector indices, as well as keeping your R2R installation current, is crucial for maintaining optimal performance at scale. - -## Vector Indices -### Do You Need Vector Indices? - -Vector indices are **not necessary for all deployments**, especially in multi-user applications where each user typically queries their own subset of documents. Consider that: - -- In multi-user applications, queries are usually filtered by user_id, drastically reducing the actual number of vectors being searched -- A system with 1 million total vectors but 1000 users might only search through 1000 vectors per query -- Performance impact of not having indices is minimal when searching small per-user document sets - -Only consider implementing vector indices when: -- Individual users are searching across hundreds of thousands of documents -- Query latency becomes a bottleneck even with user-specific filtering -- You need to support cross-user search functionality at scale - -For development environments or smaller deployments, the overhead of maintaining vector indices often outweighs their benefits. - -### Vector Index Management - -R2R supports multiple indexing methods, with HNSW (Hierarchical Navigable Small World) being recommended for most use cases: - -```python -# Create vector index -create_response = client.create_vector_index( - table_name="vectors", - index_method="hnsw", - index_measure="cosine_distance", - index_arguments={ - "m": 16, # Number of connections per element - "ef_construction": 64 # Size of dynamic candidate list - }, - concurrently=True -) - -# List existing indices -indices = client.list_vector_indices(table_name="vectors") - -# Delete an index -delete_response = client.delete_vector_index( - index_name="ix_vector_cosine_ops_hnsw__20241021211541", - table_name="vectors", - concurrently=True -) -``` - -#### Important Considerations - -1. **Pre-warming Requirement** - - New indices start "cold" and require warming for optimal performance - - Initial queries will be slower until the index is loaded into memory - - Consider implementing explicit pre-warming in production - - Warming must be repeated after system restarts - -2. **Resource Usage** - - Index creation is CPU and memory intensive - - Memory usage scales with both dataset size and `m` parameter - - Consider creating indices during off-peak hours - -3. **Performance Tuning** - - HNSW Parameters: - - `m`: 16-64 (higher = better quality, more memory) - - `ef_construction`: 64-100 (higher = better quality, longer build time) - - Distance Measures: - - `cosine_distance`: Best for normalized vectors (most common) - - `l2_distance`: Better for absolute distances - - `max_inner_product`: Optimized for dot product similarity - -## System Updates and Maintenance - -### Version Management - -Check your current R2R version: -```bash -r2r version -``` - -### Update Process - -1. **Prepare for Update** - ```bash - # Check current versions - r2r version - r2r db current - - # Generate system report (optional) - r2r generate-report - ``` - -2. **Stop Running Services** - ```bash - r2r docker-down - ``` - -3. **Update R2R** - ```bash - r2r update - ``` - -4. **Update Database** - ```bash - r2r db upgrade - ``` - -5. **Restart Services** - ```bash - r2r serve --docker [additional options] - ``` - -### Database Migration Management - -R2R uses database migrations to manage schema changes. Always check and update your database schema after updates: - -```bash -# Check current migration -r2r db current - -# Apply migrations -r2r db upgrade -``` - -### Managing Multiple Environments - -Use different project names and schemas for different environments: - -```bash -# Development -export R2R_PROJECT_NAME=r2r_dev -r2r serve --docker --project-name r2r-dev - -# Staging -export R2R_PROJECT_NAME=r2r_staging -r2r serve --docker --project-name r2r-staging - -# Production -export R2R_PROJECT_NAME=r2r_prod -r2r serve --docker --project-name r2r-prod -``` - -## Troubleshooting - -If issues occur: - -1. Generate a system report: - ```bash - r2r generate-report - ``` - -2. Check container health: - ```bash - r2r docker-down - r2r serve --docker - ``` - -3. Review database state: - ```bash - r2r db current - r2r db history - ``` - -4. Roll back if needed: - ```bash - r2r db downgrade --revision - ``` - - -## Scaling Strategies - -### Horizontal Scaling - -For applications serving many users: - -1. **Load Balancing** - - Deploy multiple R2R instances behind a load balancer - - Each instance can handle a subset of users - - Particularly effective since most queries are user-specific - -2. **Sharding** - - Consider sharding by user_id for large multi-user deployments - - Each shard handles a subset of users - - Maintains performance even with millions of total documents - -### Vertical Scaling - -For applications requiring large single-user searches: - -1. **Cloud Provider Solutions** - - AWS RDS supports up to 1 billion vectors per instance - - Scale up compute and memory resources as needed - - Example instance types: - - `db.r6g.16xlarge`: Suitable for up to 100M vectors - - `db.r6g.metal`: Can handle 1B+ vectors - -2. **Memory Optimization** - ```python - # Optimize for large vector collections - client.create_vector_index( - table_name="vectors", - index_method="hnsw", - index_arguments={ - "m": 32, # Increased for better performance - "ef_construction": 80 # Balanced for large collections - } - ) - ``` - -### Multi-User Considerations - -1. **Filtering Optimization** - ```python - # Efficient per-user search - response = client.search( - "query", - vector_search_settings={ - "search_filters": { - "user_id": {"$eq": "current_user_id"} - } - } - ) - ``` - -2. **Collection Management** - - Group related documents into collections - - Enable efficient access control - - Optimize search scope - -3. **Resource Allocation** - - Monitor per-user resource usage - - Implement usage quotas if needed - - Consider dedicated instances for power users - - -### Performance Monitoring - -Monitor these metrics to inform scaling decisions: - -1. **Query Performance** - - Average query latency per user - - Number of vectors searched per query - - Cache hit rates - -2. **System Resources** - - Memory usage per instance - - CPU utilization - - Storage growth rate - -3. **User Patterns** - - Number of active users - - Query patterns and peak usage times - - Document count per user - - -## Additional Resources - -- [Python SDK Ingestion Documentation](/documentation/python-sdk/ingestion) -- [CLI Maintenance Documentation](/documentation/cli/maintenance) -- [Ingestion Configuration Documentation](/documentation/configuration/ingestion/overview) diff --git a/docs/cookbooks/observability.mdx b/docs/cookbooks/observability.mdx deleted file mode 100644 index cbc3e1b5f..000000000 --- a/docs/cookbooks/observability.mdx +++ /dev/null @@ -1,294 +0,0 @@ ---- -title: 'Analytics & Observability' -description: 'Learn how to use analytics and and logging with R2R' -icon: 'telescope' ---- - - -## Introduction - -This guide demonstrates how to leverage R2R's powerful analytics and logging features. These capabilities allow you to monitor system performance, track usage patterns, and gain valuable insights into your RAG application's behavior. - - -The features described in this cookbook are typically restricted to superusers. Ensure you have the necessary permissions before attempting to access these features. - - -For more information on user roles and permissions, including how to set up and manage superuser accounts, please refer to our [User Auth Cookbook](/cookbooks/user-auth). - -## Setup - -Before diving into the authentication features, ensure you have R2R installed and configured as described in the [installation guide](/documentation/installation). For this guide, we'll use the default configuration. Further, `r2r serve` must be called to serve R2R in either your local environment or local Docker engine. - -## Basic Usage - -### Logging - -R2R automatically logs various events and metrics during its operation. - - -To fetch our logs using the client-server architecture, use the following: - - - - -```bash -r2r logs -``` - - - -```python -client.logs() -``` - - - -```javascript -client.logs() -``` - - - - -```bash -curl -X POST http://localhost:7272/v2/logs \ - -H "Content-Type: application/json" \ - -d '{ - "log_type_filter": null, - "max_runs_requested": 100 - }' -``` - - - - -Expected Output: -```python -[ - { - 'run_id': UUID('27f124ad-6f70-4641-89ab-f346dc9d1c2f'), - 'run_type': 'rag', - 'entries': [ - {'key': 'search_query', 'value': 'Who is Aristotle?'}, - {'key': 'search_latency', 'value': '0.39'}, - {'key': 'search_results', 'value': '["{\\"id\\":\\"7ed3a01c-88dc-5a58-a68b-6e5d9f292df2\\",...}"]'}, - {'key': 'rag_generation_latency', 'value': '3.79'}, - {'key': 'llm_response', 'value': 'Aristotle (Greek: Ἀριστοτέλης Aristotélēs; 384–322 BC) was...'} - ] - }, - # More log entries... -] -``` - -These logs provide detailed information about each operation, including search results, queries, latencies, and LLM responses. - - -To fetch the logs directly from an instantiated R2R object: - -```python -app = R2R() - -# Perform some searches / RAG completions -# ... - -# Get the latest logs -logs = app.logs() -print(logs) -``` -### Analytics - -R2R offers an analytics feature that allows you to aggregate and analyze log data: - - -The relevant command - - -```bash -r2r analytics --filters '{"search_latencies": "search_latency"}' --analysis-types '{"search_latencies": ["basic_statistics", "search_latency"]}' -``` - - - -```python -client.analytics( - {"search_latencies": "search_latency"}, - {"search_latencies": ["basic_statistics", "search_latency"]} -) -``` - - - -```javascript -const filterCriteria = { - filters: { - search_latencies: "search_latency", - }, - }; - - const analysisTypes = { - search_latencies: ["basic_statistics", "search_latency"], - }; - - client.analytics(filterCriteria, analysisTypes); -``` - - - - -```bash -curl -X POST http://localhost:7272/v2/analytics \ - -H "Content-Type: application/json" \ - -d '{ - "filter_criteria": { - "filters": { - "search_latencies": "search_latency" - } - }, - "analysis_types": - { - "analysis_types": { - "search_latencies": ["basic_statistics", "search_latency"] - } - } - }' -``` - - - - - - -Expected Output: -```python -{ - 'results': { - 'filtered_logs': { - 'search_latencies': [ - { - 'timestamp': '2024-06-20 21:29:06', - 'log_id': UUID('0f28063c-8b87-4934-90dc-4cd84dda5f5c'), - 'key': 'search_latency', - 'value': '0.66', - 'rn': 3 - }, - ... - ] - }, - 'search_latencies': { - 'Mean': 0.734, - 'Median': 0.523, - 'Mode': 0.495, - 'Standard Deviation': 0.213, - 'Variance': 0.0453 - } - } -} -``` - - -To fetch the analytics directly from an instantiated R2R object: - -```python -from r2r import FilterCriteria, AnalysisTypes - -filter_criteria = FilterCriteria(filters={"search_latencies": "search_latency"}) -analysis_types = AnalysisTypes(analysis_types={"search_latencies": ["basic_statistics", "search_latency"]}) - -analytics_results = app.analytics(filter_criteria, analysis_types) -print(analytics_results) -``` -The boilerplate analytics implementation allows you to: - 1. Filter logs based on specific criteria - 2. Perform statistical analysis on various metrics (e.g., search latencies) - 3. Track performance trends over time - 4. Identify potential bottlenecks or areas for optimization - - -## Experimental Features - - -Advanced analytics features are still in an experimental state - please reach out to the R2R team if you are interested in configuring / using these additional features. - - -### Custom Analytics - -R2R's analytics system is flexible and allows for custom analysis. You can specify different filters and analysis types to focus on specific aspects of your application's performance. - -```python -# Analyze RAG latencies -rag_filter = FilterCriteria(filters={"rag_latencies": "rag_generation_latency", "rag_eval": "rag_eval_metric"}) -rag_analysis = AnalysisTypes(analysis_types={"rag_latencies": ["basic_statistics", "rag_generation_latency"]}) -rag_analytics = app.analytics(rag_filter, rag_analysis) - -# Track usage patterns by user -user_filter = FilterCriteria(filters={"user_patterns": "user_id"}) -user_analysis = AnalysisTypes(analysis_types={"user_patterns": ["bar_chart", "user_id"]}) -user_analytics = app.analytics(user_filter, user_analysis) - -# Monitor error rates -error_filter = FilterCriteria(filters={"error_rates": "error"}) -error_analysis = AnalysisTypes(analysis_types={"error_rates": ["basic_statistics", "error"]}) -error_analytics = app.analytics(error_filter, error_analysis) -``` - -### Preloading Data for Analysis - -To get meaningful analytics, you need a substantial amount of data. Here's a script to preload your database with random searches: - -```python -import random -from r2r import R2R, GenerationConfig - -app = R2R() - -# List of sample queries -queries = [ - "What is artificial intelligence?", - "Explain machine learning.", - "How does natural language processing work?", - "What are neural networks?", - "Describe deep learning.", - # Add more queries as needed -] - -# Perform random searches -for _ in range(1000): - query = random.choice(queries) - app.rag(query, GenerationConfig(model="openai/gpt-4o-mini")) - -print("Preloading complete. You can now run analytics on this data.") -``` - -After running this script, you'll have a rich dataset to analyze using the analytics features described above. - -### User-Level Analytics - -To get analytics for a specific user: - -```python -user_id = "your_user_id_here" - -user_filter = FilterCriteria(filters={"user_analytics": "user_id"}) -user_analysis = AnalysisTypes(analysis_types={ - "user_analytics": ["basic_statistics", "user_id"], - "user_search_latencies": ["basic_statistics", "search_latency"] -}) - -user_analytics = app.analytics(user_filter, user_analysis) -print(f"Analytics for user {user_id}:") -print(user_analytics) -``` - -This will give you insights into the behavior and performance of specific users in your system. - -## Summary - -R2R's logging and analytics features provide powerful tools for understanding and optimizing your RAG application. By leveraging these capabilities, you can: - -- Monitor system performance in real-time -- Analyze trends in search and RAG operations -- Identify potential bottlenecks or areas for improvement -- Track user behavior and usage patterns -- Make data-driven decisions to enhance your application's performance and user experience - -For detailed setup and basic functionality, refer back to the [R2R Quickstart](/documentation/quickstart). For more advanced usage and customization options, join the [R2R Discord community](https://discord.gg/p6KqD2kjtB). diff --git a/docs/cookbooks/orchestration.mdx b/docs/cookbooks/orchestration.mdx deleted file mode 100644 index 63d83e13c..000000000 --- a/docs/cookbooks/orchestration.mdx +++ /dev/null @@ -1,89 +0,0 @@ ---- -title: 'Orchestration' -description: 'Learn how orchestration is handled inside R2R' -icon: 'music' ---- - -## Introduction to orchestration - -R2R uses [Hatchet](https://docs.hatchet.run/home) for orchestrating complex workflows, particularly for ingestion and knowledge graph construction processes. - -Hatchet is a distributed, fault-tolerant task queue that solves scaling problems like concurrency, fairness, and rate limiting. It allows R2R to distribute functions between workers with minimal configuration. - -### Key Concepts - -1. **Workflows**: Sets of functions executed in response to external triggers. -2. **Workers**: Long-running processes that execute workflow functions. -3. **Managed Queue**: Low-latency queue for handling real-time tasks. - -## Orchestration in R2R - - -### Benefits of orchestration - -1. **Scalability**: Efficiently handles large-scale tasks. -2. **Fault Tolerance**: Built-in retry mechanisms and error handling. -3. **Flexibility**: Easy to add or modify workflows as R2R's capabilities expand. - -### Workflows in R2R - -1. **IngestFilesWorkflow**: Handles file ingestion, parsing, chunking, and embedding. -2. **UpdateFilesWorkflow**: Manages the process of updating existing files. -3. **KgExtractAndStoreWorkflow**: Extracts and stores knowledge graph information. -4. **CreateGraphWorkflow**: Orchestrates the creation of knowledge graphs. -5. **EnrichGraphWorkflow**: Handles graph enrichment processes like node creation and clustering. - - -## Orchestration GUI - -By default, the R2R Docker ships with with Hatchet's front-end application on port 7274. This can be accessed by navigating to `http://localhost:7274`. - -You may login with the following credentials: - - - - -**Email:** admin@example.com - -**Password:** Admin123!! - - -### Login - - - - - - -### Running Tasks - -The panel below shows the state of the Hatchet workflow panel at `http://localhost:7274/workflow-runs` immediately after calling `r2r ingest-sample-files`: - - - - - - -### Inspecting a workflow - -You can inspect a workflow within Hatchet and can even attempt to retry the job from directly in the GUI in the case of failure: - - - - - - - -### Long running tasks - -Hatchet supports long running tasks, which is very useful during knowledge graph construction: - - - - - - - -## Coming Soon - -In the coming day(s) / week(s) we will further highlight the available feature set and best practices for orchestrating your ingestion workflows inside R2R. diff --git a/docs/cookbooks/remote-cloud.mdx b/docs/cookbooks/remote-cloud.mdx deleted file mode 100644 index 85c29dbf1..000000000 --- a/docs/cookbooks/remote-cloud.mdx +++ /dev/null @@ -1,9 +0,0 @@ ---- -title: 'Remote Cloud' -description: 'Learn how to deploy R2R into remote cloud environments' -icon: 'cloud' ---- - -1. [**Deploy with Azure**](/documentation/deployment/azure) -2. [**Deploy with GCP**](/documentation/deployment/gcp) -3. [**Deploy with AWS**](/documentation/deployment/aws) diff --git a/docs/cookbooks/user-auth.mdx b/docs/cookbooks/user-auth.mdx deleted file mode 100644 index 0c47b72f4..000000000 --- a/docs/cookbooks/user-auth.mdx +++ /dev/null @@ -1,266 +0,0 @@ ---- -title: 'User Auth' -description: 'A comprehensive guide to user authentication and management features in R2R' -icon: 'key' ---- - -## Introduction - -R2R provides a complete set of user authentication and management features, allowing developers to implement secure and feature-rich authentication systems, or to integrate directly with their authentication provider of choice. - -[Refer here](/documentation/deep-dive/providers/auth) for documentation on the available authentication provider options built into R2R, or [refer here](/api-reference/endpoint/register) for available auth API reference. - - - -When authentication is not required (require_authentication is set to false, which is the default in `r2r.toml`), unauthenticated requests will default to using the credentials of the default admin user. - -This behavior ensures that operations can proceed smoothly in development or testing environments where authentication may not be enforced, but it should be used with caution in production settings. - - - -## Setup - -Before diving into the authentication features, ensure you have R2R installed and configured as described in the [installation guide](/documentation/installation). For this guide, we'll use the default configuration. Further, `r2r serve` must be called to serve R2R in either your local environment or local Docker engine. - -## Basic Usage - -### User Registration and Login - -Let's start by registering a new user and logging in: - -```python core/examples/scripts/run_auth_workflow.py -from r2r import R2RClient - -client = R2RClient("http://localhost:7272") # Replace with your R2R deployment URL - -# Register a new user -user_result = client.register("user1@test.com", "password123") -# {'results': {'email': 'user1@test.com', 'id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'hashed_password': '$2b$12$p6a9glpAQaq.4uzi4gXQru6PN7WBpky/xMeYK9LShEe4ygBf1L.pK', 'is_superuser': False, 'is_active': True, 'is_verified': False, 'verification_code_expiry': None, 'name': None, 'bio': None, 'profile_picture': None, 'created_at': '2024-07-16T22:53:47.524794Z', 'updated_at': '2024-07-16T22:53:47.524794Z'}} - -# Login immediately (assuming email verification is disabled) -login_result = client.login("user1@test.com", "password123") -# {'results': {'access_token': {'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ1c2VyMUB0ZXN0LmNvbSIsImV4cCI6MTcyMTE5OTI0Ni44Nzc2NTksInRva2VuX3R5cGUiOiJhY2Nlc3MifQ.P4RcCkCe0H5UHPHak7tRovIgyQcql4gB8NlqdDDk50Y', 'token_type': 'access'}, 'refresh_token': {'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ1c2VyMUB0ZXN0LmNvbSIsImV4cCI6MTcyMTc3NTI0NiwidG9rZW5fdHlwZSI6InJlZnJlc2gifQ.VgfZ4Lhz0f2GW41NYv6KLrMCK3CdGmGVug7eTQp0xPU', 'token_type': 'refresh'}}} -``` - -This code snippet demonstrates the basic user registration and login process. The `register` method creates a new user account, while the `login` method authenticates the user and returns access and refresh tokens. In the example above, it was assumed that email verification was disabled. - -### Email Verification (Optional) - -If email verification is enabled in your R2R configuration, you'll need to verify the user's email before they can log in: - -```python -verify_result = client.verify_email("verification_code_here") -# {"results": {"message": "Email verified successfully"}} -``` - -### Token Refresh - -After logging in, you gain immediate access to user information such as general account details, documents overview, and utility functions like token refresh: - -```python -refresh_result = client.refresh_access_token() -# {'results': {'access_token': {'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ1c2VyMUB0ZXN0LmNvbSIsImV4cCI6MTcyMTIwMTk3Mi4zMzI4NTIsInRva2VuX3R5cGUiOiJhY2Nlc3MifQ.Ze9A50kefndAtu2tvcvMCiilFfAhrOV0l5A7RZgPvBY', 'token_type': 'access'}, 'refresh_token': {'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ1c2VyMUB0ZXN0LmNvbSIsImV4cCI6MTcyMTc3Nzk3MiwidG9rZW5fdHlwZSI6InJlZnJlc2gifQ.NwzFH8e2tKO0bH1Hdm_eq39VqmGPf7xSNOOhDlKFQFQ', 'token_type': 'refresh'}}} -``` - - -## Document Management - -R2R allows users to manage their documents securely. Here's how to ingest and search a given users documents: - -### Ingesting Documents - -```python -import os - -# Ingest a sample file for the logged-in user -script_path = os.path.dirname(__file__) -sample_file = os.path.join(script_path, "..", "data", "aristotle.txt") -ingestion_result = client.ingest_files([sample_file]) -# {'results': {'processed_documents': ["Document 'aristotle.txt' processed successfully."], 'failed_documents': [], 'skipped_documents': []}} -``` - -### User Document Overview -```python -documents_overview = client.documents_overview() -# {'results': [{'document_id': '6ab698c6-e494-5441-a740-49395f2b1881', 'version': 'v0', 'size_in_bytes': 73353, 'metadata': {}, 'status': 'success', 'user_id': 'ba0c75eb-0b21-4eb1-a902-082476e5e972', 'title': 'aristotle.txt', 'created_at': '2024-07-16T16:25:27.634411Z', 'updated_at': '2024-07-16T16:25:27.634411Z'}]} -``` -### Search & RAG - -```python -search_result = client.search(query="Sample search query") -# {'results': {'vector_search_results': [{ ... 'metadata': {'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 0, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '781ce9e6-9e73-5012-8445-35b7d84f161c', 'score': 0.670799394202279, 'metadata': {'text': "Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent", 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 8, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': 'f32cda7c-2538-5248-b0b6-4d0d45cc4d60', 'score': 0.667974928858889, 'metadata': {'text': 'Aristotle was revered among medieval Muslim scholars as "The First Teacher", and among medieval Christians like Thomas Aquinas as simply "The Philosopher", while the poet Dante called him "the master of those who know". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle\'s influence on logic continued well into the 19th century. In addition, his ethics, although always influential, gained renewed interest with', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 5, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': 'e6592fd5-e02e-5847-b158-79bbdd8710a2', 'score': 0.6647597950983339, 'metadata': {'text': "Little is known about Aristotle's life. He was born in the city of Stagira in northern Greece during the Classical period. His father, Nicomachus, died when Aristotle was a child, and he was brought up by a guardian. At 17 or 18, he joined Plato's Academy in Athens and remained there until the age of 37 (c.\u2009347 BC). Shortly after Plato died, Aristotle left Athens and, at the request of Philip II of Macedon, tutored his son Alexander the Great beginning in 343 BC. He established a library in the Lyceum,", 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 1, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '8c72faca-6d98-5129-b9ee-70769272e361', 'score': 0.6476034942146001, 'metadata': {'text': 'Among countless other achievements, Aristotle was the founder of formal logic,[146] pioneered the study of zoology, and left every future scientist and philosopher in his debt through his contributions to the scientific method.[2][147][148] Taneli Kukkonen, observes that his achievement in founding two sciences is unmatched, and his reach in influencing "every branch of intellectual enterprise" including Western ethical and political theory, theology, rhetoric, and literary analysis is equally long. As a', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 175, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '3ce904cc-5835-551a-a85c-f00be1a5e8dc', 'score': 0.626156434278918, 'metadata': {'text': 'Aristotle has been called the father of logic, biology, political science, zoology, embryology, natural law, scientific method, rhetoric, psychology, realism, criticism, individualism, teleology, and meteorology.[151]', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 177, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '6a15b09b-4bf1-5c1f-af24-fe659c8a011d', 'score': 0.624521989361129, 'metadata': {'text': 'after friends and relatives, and to deal with the latter as with beasts or plants".[13] By 335 BC, Aristotle had returned to Athens, establishing his own school there known as the Lyceum. Aristotle conducted courses at the school for the next twelve years. While in Athens, his wife Pythias died and Aristotle became involved with Herpyllis of Stagira. They had a son whom Aristotle named after his father, Nicomachus. If the Suda – an uncritical compilation from the Middle Ages – is accurate, he may also have', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 16, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '19a755d0-770f-5c6f-991e-ca191a40c8d6', 'score': 0.614493374720815, 'metadata': {'text': "passed to Plato's nephew Speusippus, although it is possible that he feared the anti-Macedonian sentiments in Athens at that time and left before Plato died.[10] Aristotle then accompanied Xenocrates to the court of his friend Hermias of Atarneus in Asia Minor. After the death of Hermias, Aristotle travelled with his pupil Theophrastus to the island of Lesbos, where together they researched the botany and zoology of the island and its sheltered lagoon. While in Lesbos, Aristotle married Pythias, either", 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 12, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '33b2dbd7-2f3a-5450-9618-976a996bde2a', 'score': 0.6117302824500019, 'metadata': {'text': 'Transmission\nFurther information: List of writers influenced by Aristotle\nMore than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, "it is doubtful whether any human being has ever known as much as he did".[145]', 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 174, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}, {'id': '2d101d42-6317-5d8c-85c3-fb9b6d947c68', 'score': 0.610827455968717, 'metadata': {'text': "The immediate influence of Aristotle's work was felt as the Lyceum grew into the Peripatetic school. Aristotle's students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle's influence over Alexander the Great is seen in the latter's bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal about Persian customs and traditions from his", 'title': 'aristotle.txt', 'user_id': 'bf417057-f104-4e75-8579-c74d26fcbed3', 'version': 'v0', 'chunk_order': 181, 'document_id': 'a2645197-d07f-558d-ba55-f7a60eb29621', 'extraction_id': 'b7bbd497-311a-5dc8-8a51-79e2208739e0', 'associatedQuery': 'Who was Aristotle'}}], 'kg_search_results': []}} - -rag_result = client.rag(query="Sample search query") -# {'results': {'completion': {'id': 'chatcmpl-9llkGYsrG1YZaWkqYvzXr1eQNl0gA', 'choices': [{'finish_reason': 'stop', 'index': 0, 'message': {'content': 'The search results for the query "Sample search query" include various topics and excerpts related to Aristotle\'s works and other subjects. Here are the relevant references:\n\n1. **Categories of Aristotle\'s Works**:\n - On Interpretation [1], [2]\n - Prior Analytics [1], [2]\n - Posterior Analytics [1], [2]\n - Topics [1], [2]\n - On Sophistical Refutations [1], [2]\n\n2. **Aristotle\'s Theory on Sense Perceptions and Memory**:\n - Aristotle\'s belief that people receive sense perceptions and perceive them as impressions, leading to the weaving together of new experiences. The search for these impressions involves searching the memory itself, where recollection occurs when one retrieved experience naturally follows another [3], [4].\n\n3. **Medieval Judaism**:\n - References to Medieval Judaism [5], [6].\n\n4. **Scientific Style**:\n - References to Scientific Style [7], [8].\n\n5. **Recovery of Texts by Apellicon**:\n - Apellicon\'s efforts to recover degraded texts by copying them into new manuscripts and using guesswork to fill in unreadable gaps [9], [10].\n\nThese references provide a broad overview of the topics related to the query, including Aristotle\'s works, his theories on memory, Medieval Judaism, scientific style, and the recovery of ancient texts.', 'role': 'assistant'}}], 'created': 1721171976, 'model': 'gpt-4o-2024-05-13', 'object': 'chat.completion', 'system_fingerprint': 'fp_5e997b69d8', 'usage': {'completion_tokens': 286, 'prompt_tokens': 513, 'total_tokens': 799}}, 'search_results': {'vector_search_results': [{'id': 'd70e2776-befa-5b67-9da7-b76aedb7c101', 'score': 0.270276627830369, 'metadata': {'text': 'Categories\nOn Interpretation\nPrior Analytics\nPosterior Analytics\nTopics\nOn Sophistical Refutations', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 26, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': 'f54c9cda-0053-5ea2-a22b-aaba6437518c', 'score': 0.270276627830369, 'metadata': {'text': 'Categories\nOn Interpretation\nPrior Analytics\nPosterior Analytics\nTopics\nOn Sophistical Refutations', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 26, 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': 'd0675bcd-23d1-5982-8114-1a6459faec3f', 'score': 0.242980153623792, 'metadata': {'text': 'Because Aristotle believes people receive all kinds of sense perceptions and perceive them as impressions, people are continually weaving together new impressions of experiences. To search for these impressions, people search the memory itself.[105] Within the memory, if one experience is offered instead of a specific memory, that person will reject this experience until they find what they are looking for. Recollection occurs when one retrieved experience naturally follows another. If the chain of', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 119, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': '69aed771-061f-5360-90f1-0ce395601b98', 'score': 0.242980153623792, 'metadata': {'text': 'Because Aristotle believes people receive all kinds of sense perceptions and perceive them as impressions, people are continually weaving together new impressions of experiences. To search for these impressions, people search the memory itself.[105] Within the memory, if one experience is offered instead of a specific memory, that person will reject this experience until they find what they are looking for. Recollection occurs when one retrieved experience naturally follows another. If the chain of', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 119, 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': 'dadd2d48-a2b7-5e55-9a8c-1030712c5ca0', 'score': 0.20218510005651702, 'metadata': {'text': 'Medieval Judaism', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 202, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': 'da81f692-40d9-599b-a69b-25b6a5179b47', 'score': 0.20218510005651702, 'metadata': {'text': 'Medieval Judaism', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 202, 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': '0c4fea20-f7ee-520f-ae1f-155ecb398e1f', 'score': 0.19056136124594703, 'metadata': {'text': 'Scientific style', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 92, 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}, {'id': 'c3c3145a-5d9d-5362-9629-f9159a027a9d', 'score': 0.19051768949311598, 'metadata': {'text': 'Scientific style', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 92, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': '63e3a252-90bd-5494-9f9f-aee772f4db54', 'score': 0.18900877964391904, 'metadata': {'text': 'Apellicon sought to recover the texts, many of which were seriously degraded at this point due to the conditions in which they were stored. He had them copied out into new manuscripts, and used his best guesswork to fill in the gaps where the originals were unreadable.[216]:\u200a5–6', 'title': 'aristotle.txt', 'user_id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'version': 'v0', 'chunk_order': 228, 'document_id': '4bb1e5e0-3bb3-54e0-bc71-69e68bce30c7', 'extraction_id': '9401dfe6-10dd-5eb1-8b88-de1927a6c556', 'associatedQuery': 'Sample search query'}}, {'id': '2c1183a8-e130-5432-a311-ee1f0f194562', 'score': 0.18894388145542895, 'metadata': {'text': 'Apellicon sought to recover the texts, many of which were seriously degraded at this point due to the conditions in which they were stored. He had them copied out into new manuscripts, and used his best guesswork to fill in the gaps where the originals were unreadable.[216]:\u200a5–6', 'title': 'aristotle.txt', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'version': 'v0', 'chunk_order': 228, 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'extraction_id': 'bc497a0c-4b17-5e86-97d4-aa06474e0e5b', 'associatedQuery': 'Sample search query'}}], 'kg_search_results': None}}} -``` - -## Advanced Authentication Features - -R2R offers several advanced authentication features to enhance security and user experience: - -### Password Management - -Users can change their passwords and request password resets: - -```python -# Change password -change_password_result = client.change_password("password123", "new_password") -# {"result": {"message": "Password changed successfully"}} - -# Request password reset -reset_request_result = client.request_password_reset("user@example.com") -# {"result": {"message": "If the email exists, a reset link has been sent"}} - -# Confirm password reset (after user receives reset token) -reset_confirm_result = client.confirm_password_reset("reset_token_here", "new_password") -# {"result": {"message": "Password reset successfully"}} -``` - -### User Profile Management - -Users can view and update their profiles: - -```python -# Get user profile -profile = client.user() -# {'results': {'email': 'user1@test.com', 'id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'hashed_password': 'null', 'is_superuser': False, 'is_active': True, 'is_verified': True, 'verification_code_expiry': None, 'name': None, 'bio': None, 'profile_picture': None, 'created_at': '2024-07-16T23:06:42.123303Z', 'updated_at': '2024-07-16T23:22:48.256239Z'}} - -# Update user profile -update_result = client.update_user(name="John Doe", bio="R2R enthusiast") -# {'results': {'email': 'user1@test.com', 'id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', 'hashed_password': 'null', 'is_superuser': False, 'is_active': True, 'is_verified': True, 'verification_code_expiry': None, 'name': 'John Doe', 'bio': 'R2R enthusiast', 'profile_picture': None, 'created_at': '2024-07-16T23:06:42.123303Z', 'updated_at': '2024-07-16T23:22:48.256239Z'}} -``` - -### Account Deletion - -Users can delete their accounts: - -```python -# Delete account (requires password confirmation) -user_id = register_response["results"]["id"] # input unique id here -delete_result = client.delete_user(user_id, "password123") -# {'results': {'message': 'User account deleted successfully'}} -``` - -### Logout - -To end a user session: - -```python -logout_result = client.logout() -print(f"Logout Result:\n{logout_result}") -# {'results': {'message': 'Logged out successfully'}} -``` - -## Superuser Capabilities and Default Admin Creation - -R2R includes powerful superuser capabilities and a mechanism for default admin creation, which are crucial for system management and initial setup. Let's explore these features: - -### Superuser Capabilities - -Superusers in R2R have elevated privileges that allow them to perform system-wide operations and access sensitive information. Some key superuser capabilities include: - -1. **User Management**: Superusers can view, modify, and delete user accounts. -2. **System-wide Document Access**: They can access and manage documents across all users. -3. **Analytics and Observability**: Superusers have access to system-wide analytics and logs. -4. **Configuration Management**: They can modify system configurations and settings. - -To use superuser capabilities, you need to authenticate as a superuser. The methods for accessing these features are the same as regular user methods, but with expanded scope and permissions. - -### Default Admin Creation - -R2R automatically creates a default admin user during initialization. This process is handled by the `R2RAuthProvider` class. Here's how it works: - -1. During system initialization, R2R attempts to create a default admin user. -2. The admin email and password are typically set through environment variables or configuration files. -3. If the admin user already exists, R2R logs this information and continues without creating a duplicate. - -The relevant part of the configuration that affects this process is: - -```toml -[auth] -provider = "r2r" -access_token_lifetime_in_minutes = 60 -refresh_token_lifetime_in_days = 7 -require_authentication = true -require_email_verification = false -default_admin_email = "admin@example.com" -default_admin_password = "change_me_immediately" -``` - -- With `"require_authentication": false`, the system allows unauthenticated access for testing and development. In a production environment, this should be set to `true`. -- `"require_email_verification": false` means that email verification is not required for new users, including the default admin. For increased security in production, consider enabling this. - -### Accessing Superuser Features - -To access superuser features, you need to authenticate as the default admin or another user with superuser privileges. Here's an example of how to do this: - -```python - -from r2r import R2RClient - -client = R2RClient("http://localhost:7272") - -# Login as admin -login_result = client.login("admin@example.com", "change_me_immediately") - -# Now you can access superuser features, for example: -users_overview = client.users_overview() -# {'results': [{'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'num_files': 2, 'total_size_in_bytes': 73672, 'document_ids': ['c4967f03-1780-5161-8e1d-57b55aa65076', '9fbe403b-c11c-5aae-8ade-ef22980c3ad1']}, {'user_id': 'ac730ec3-7d3d-451a-a166-e7ac7c57b198', 'num_files': 1, 'total_size_in_bytes': 73353, 'document_ids': ['d4861e78-cf02-5184-9b6a-d5bdbddd39b2']}, {'user_id': 'e0514342-e51a-43e5-8aaa-665468102dce', 'num_files': 1, 'total_size_in_bytes': 73353, 'document_ids': ['f4fbe534-b7d6-5fec-9d41-9093b2112732']}]} - -# Access system-wide logs -logs = client.logs() -# {'results': [{'run_id': '645cb90f-c281-4188-b93b-94fb383170f6', 'run_type': 'search', 'entries': [{'key': 'search_latency', 'value': '0.43'}, { ... - -# Perform analytics -analytics_result = client.analytics( - {"all_latencies": "search_latency"}, - {"search_latencies": ["basic_statistics", "search_latency"]} -) -# {'results': {'filtered_logs': {'search_latencies': [{'timestamp': '2024-07-18 18:10:26', 'log_id': '645cb90f-c281-4188-b93b-94fb383170f6', 'key': 'search_latency', 'value': '0.43', 'rn': 3}, {'timestamp': '2024-07-18 18:04:54', 'log_id': 'a22d6a4c-3e68-4f01-b129-c9cbb5ae0b86', 'key': 'search_latency', 'value': '0.76', 'rn': 3}, {'timestamp': '2024-07-18 17:45:04', 'log_id': '253aa7b2-5abc-46d4-9bc3-f1ee47598bc5', 'key': 'search_latency', 'value': '0.43', 'rn': 3}, {'timestamp': '2024-07-18 17:44:47', 'log_id': 'add3d166-392c-44e0-aec2-d00d97a584f9', 'key': 'search_latency', 'value': '0.71', 'rn': 3}, {'timestamp': '2024-07-18 17:43:40', 'log_id': '9b64d038-aa56-44c9-af5e-96091fff62f2', 'key': 'search_latency', 'value': '0.44', 'rn': 3}, {'timestamp': '2024-07-18 17:43:20', 'log_id': '2a433a26-dc5b-4460-823a-58f01070e44d', 'key': 'search_latency', 'value': '0.37', 'rn': 3}, {'timestamp': '2024-07-18 17:43:16', 'log_id': '71a05fb2-5993-45c3-af3d-5019a745a33d', 'key': 'search_latency', 'value': '0.72', 'rn': 3}, {'timestamp': '2024-07-16 23:19:35', 'log_id': 'fada5559-ccd1-42f3-81a1-c96dcbc2ff08', 'key': 'search_latency', 'value': '0.34', 'rn': 3}, {'timestamp': '2024-07-16 23:07:32', 'log_id': '530bc25c-efc9-4b10-b46f-529c64c89fdf', 'key': 'search_latency', 'value': '0.62', 'rn': 3}, {'timestamp': '2024-07-16 23:07:14', 'log_id': '5c977046-bd71-4d95-80ba-dcf16e33cfd5', 'key': 'search_latency', 'value': '0.72', 'rn': 3}, {'timestamp': '2024-07-16 23:06:44', 'log_id': 'a2bef456-7370-4977-83cf-a60de5abd0cf', 'key': 'search_latency', 'value': '0.44', 'rn': 3}, {'timestamp': '2024-07-16 23:02:10', 'log_id': 'ee8d8bb4-7bc5-4c92-a9a2-c3ddd9f6bcd9', 'key': 'search_latency', 'value': '0.53', 'rn': 3}, {'timestamp': '2024-07-16 22:00:48', 'log_id': '13701f93-a2ab-4182-abd0-a401aa8e720a', 'key': 'search_latency', 'value': '0.52', 'rn': 3}, {'timestamp': '2024-07-16 21:59:17', 'log_id': '17bd24d7-37e2-4838-9830-c92110f492c7', 'key': 'search_latency', 'value': '0.30', 'rn': 3}, {'timestamp': '2024-07-16 21:59:16', 'log_id': 'd1d1551b-80cc-4f93-878a-7d7579aa3b9e', 'key': 'search_latency', 'value': '0.43', 'rn': 3}, {'timestamp': '2024-07-16 21:55:46', 'log_id': '0291254e-262f-4d9d-ace2-b98c2eaae547', 'key': 'search_latency', 'value': '0.42', 'rn': 3}, {'timestamp': '2024-07-16 21:55:45', 'log_id': '74df032f-e9d6-4ba9-ae0e-1be58927c2b1', 'key': 'search_latency', 'value': '0.57', 'rn': 3}, {'timestamp': '2024-07-16 21:54:36', 'log_id': '413982fd-3588-42cc-8009-8c686a85f27e', 'key': 'search_latency', 'value': '0.55', 'rn': 3}, {'timestamp': '2024-07-16 03:35:48', 'log_id': 'ae79062e-f4f0-4fb1-90f4-4ddcb8ae0cc4', 'key': 'search_latency', 'value': '0.50', 'rn': 3}, {'timestamp': '2024-07-16 03:26:10', 'log_id': '9fd51a36-9fdf-4a70-89cb-cb0d43dd0b63', 'key': 'search_latency', 'value': '0.41', 'rn': 3}, {'timestamp': '2024-07-16 03:01:18', 'log_id': '6cb79d2e-c431-447e-bbbb-99f96d56784e', 'key': 'search_latency', 'value': '0.67', 'rn': 3}, {'timestamp': '2024-07-16 01:29:44', 'log_id': '34eea2d3-dd98-47fb-ae3b-05e1001850a5', 'key': 'search_latency', 'value': '0.42', 'rn': 3}, {'timestamp': '2024-07-16 01:29:25', 'log_id': '5204d260-4ad3-49ce-9b38-1043ceae65ac', 'key': 'search_latency', 'value': '0.58', 'rn': 3}]}, 'search_latencies': {'Mean': 0.517, 'Median': 0.5, 'Mode': 0.43, 'Standard Deviation': 0.132, 'Variance': 0.018}}} - -``` - -### Security Considerations for Superusers - -When using superuser capabilities, keep the following security considerations in mind: - -1. **Limit Superuser Access**: Only grant superuser privileges to trusted individuals who require full system access. -2. **Use Strong Passwords**: Ensure that superuser accounts, especially the default admin, use strong, unique passwords. -3. **Enable Authentication and Verification**: In production, set `"require_authentication": true` and `"require_email_verification": true` for enhanced security. -4. **Audit Superuser Actions**: Regularly review logs of superuser activities to detect any unusual or unauthorized actions. -5. **Rotate Credentials**: Periodically update superuser credentials, including the default admin password. - -By understanding and properly managing superuser capabilities and default admin creation, you can ensure secure and effective administration of your R2R deployment. - -## Security Considerations - -When implementing user authentication, consider the following security best practices: - -1. **Use HTTPS**: Always use HTTPS in production to encrypt data in transit. -2. **Implement rate limiting**: Protect against brute-force attacks by limiting login attempts. -3. **Use secure password hashing**: R2R uses bcrypt for password hashing by default, which is a secure choice. -4. **Implement multi-factor authentication (MFA)**: Consider adding MFA for an extra layer of security. -5. **Regular security audits**: Conduct regular security audits of your authentication system. - -## Customizing Authentication - -R2R's authentication system is flexible and can be customized to fit your specific needs: - -1. **Custom user fields**: Extend the User model to include additional fields. -2. **OAuth integration**: Integrate with third-party OAuth providers for social login. -3. **Custom password policies**: Implement custom password strength requirements. -4. **User roles and permissions**: Implement a role-based access control system. - -## Troubleshooting - -Here are some common issues and their solutions: - -1. **Login fails after registration**: Ensure email verification is completed if enabled. -2. **Token refresh fails**: Check if the refresh token has expired; the user may need to log in again. -3. **Unable to change password**: Verify that the current password is correct. - -## Conclusion - -R2R provides a comprehensive set of user authentication and management features, allowing developers to create secure and user-friendly applications. By leveraging these capabilities, you can implement robust user authentication, document management, and access control in your R2R-based projects. - -For more advanced use cases or custom implementations, refer to the R2R documentation or reach out to the community for support. diff --git a/docs/cookbooks/walkthrough.mdx b/docs/cookbooks/walkthrough.mdx deleted file mode 100644 index 96d540466..000000000 --- a/docs/cookbooks/walkthrough.mdx +++ /dev/null @@ -1,1483 +0,0 @@ ---- -title: 'Walkthrough' -description: 'A detailed step-by-step cookbook of the core features provided by R2R.' -icon: 'traffic-light-slow' ---- - - -This guide shows how to use R2R to: - - 1. Ingest files into R2R - 2. Search over ingested files - 3. Use your data as input to RAG (Retrieval-Augmented Generation) - 4. Perform basic user auth - 5. Observe and analyze an R2R deployment - - -Be sure to complete the [installation instructions](/documentation/installation) before continuing with this guide. - - -## Introduction - -R2R is an engine for building user-facing Retrieval-Augmented Generation (RAG) applications. At its core, R2R provides this service through an architecture of providers, services, and an integrated RESTful API. This cookbook provides a detailed walkthrough of how to interact with R2R. [Refer here](/documentation/deep-dive/main/introduction) for a deeper dive on the R2R system architecture. - -## R2R Application Lifecycle - -The following diagram illustrates how R2R assembles a user-facing application: - -```mermaid -flowchart LR - Developer[Developer] - Config[R2RConfig] - R2R[R2R Application] - SDK[R2R SDK] - User[User] - - Developer -->|Customizes| Config - Config -->|Configures| R2R - Developer -->|Deploys| R2R - Developer -->|Implements| SDK - SDK -->|Interfaces with| R2R - User -->|Interacts via| SDK -``` - - -### Hello R2R - -R2R gives developers configurable vector search and RAG right out of the box, as well as direct method calls instead of the client-server architecture seen throughout the docs: -```python core/examples/hello_r2r.py -from r2r import R2RClient - -client = R2RClient("http://localhost:7272") - -with open("test.txt", "w") as file: - file.write("John is a person that works at Google.") - -client.ingest_files(file_paths=["test.txt"]) - -# Call RAG directly -rag_response = client.rag( - query="Who is john", - rag_generation_config={"model": "openai/gpt-4o-mini", "temperature": 0.0}, -) -results = rag_response["results"] -print(f"Search Results:\n{results['search_results']}") -print(f"Completion:\n{results['completion']}") -``` - -### Configuring R2R -R2R is highly configurable. To customize your R2R deployment: - -1. Create a local configuration file named `r2r.toml`. -2. In this file, override default settings as needed. - -For example: -```toml r2r.toml -[completion] -provider = "litellm" -concurrent_request_limit = 16 - - [completion.generation_config] - model = "openai/gpt-4o" - temperature = 0.5 - -[ingestion] -provider = "r2r" -chunking_strategy = "recursive" -chunk_size = 1_024 -chunk_overlap = 512 -excluded_parsers = ["mp4"] -``` - -Then, use the `config-path` argument to specify your custom configuration when launching R2R: - -```bash -r2r serve --docker --config-path=r2r.toml -``` - -You can read more about [configuration here](/documentation/configuration). - - -## Document Ingestion and Management - -R2R efficiently handles diverse document types using Postgres with pgvector, combining relational data management with vector search capabilities. This approach enables seamless ingestion, storage, and retrieval of multimodal data, while supporting flexible document management and user permissions. - -Key features include: - -- Unique `document_id` generation for each ingested file -- User and collection permissions through `user_id` and `collection_ids` -- Document versioning for tracking changes over time -- Granular access to document content through chunk retrieval -- Flexible deletion and update mechanisms - - - Note, all document management commands are gated at the user level, with the exception of superusers. - - - - -R2R offers a powerful data ingestion process that handles various file types including `html`, `pdf`, `png`, `mp3`, and `txt`. The full list of supported filetypes is available [here](/documentation/configuration/ingestion/overview). The ingestion process parses, chunks, embeds, and stores documents efficiently with a fully asynchronous pipeline. To demonstrate this functionality: - - - -```bash -r2r ingest-sample-files -``` - - - -```python -from r2r import R2RClient -from glob import glob - -client = R2RClient("http://localhost:7272") -files = glob.glob('path/to/r2r/examples/data') -client.ingest_files(files) -``` - - - - -```javascript -const files = [ - { path: "r2r/examples/data/aristotle.txt", name: "aristotle.txt" }, -]; - -await client.ingestFiles(files, { - metadatas: [{ title: "aristotle.txt" }], - user_ids: [ - "123e4567-e89b-12d3-a456-426614174000", - ], - skip_document_info: false, -}); -``` - - - - -This command initiates the ingestion process, producing output similar to: - -```bash -[{'message': 'Ingestion task queued successfully.', 'task_id': '6e27dfca-606d-422d-b73f-2d9e138661b4', 'document_id': '28a7266e-6cee-5dd2-b7fa-e4fc8f2b49c6'}, {'message': 'Ingestion task queued successfully.', 'task_id': 'd37deef1-af08-4576-bd79-6d2a7fb6ec33', 'document_id': '2c91b66f-e960-5ff5-a482-6dd0a523d6a1'}, {'message': 'Ingestion task queued successfully.', 'task_id': '4c1240f0-0692-4b67-8d2b-1428f71ea9bc', 'document_id': '638f0ed6-e0dc-5f86-9282-1f7f5243d9fa'}, {'message': 'Ingestion task queued successfully.', 'task_id': '369abcea-79a2-480c-9ade-bbc89f5c500e', 'document_id': 'f25fd516-5cac-5c09-b120-0fc841270c7e'}, {'message': 'Ingestion task queued successfully.', 'task_id': '7c99c168-97ee-4253-8a6f-694437f3e5cb', 'document_id': '77f67c65-6406-5076-8176-3844f3ef3688'}, {'message': 'Ingestion task queued successfully.', 'task_id': '9a6f94b0-8fbc-4507-9435-53e0973aaad0', 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1'}, {'message': 'Ingestion task queued successfully.', 'task_id': '61d0e2e0-45ec-43db-9837-ff4da5166ee9', 'document_id': '0032a7a7-cb2a-5d08-bfc1-93d3b760deb4'}, {'message': 'Ingestion task queued successfully.', 'task_id': '1479390e-c295-47b0-a570-370b05b86c8b', 'document_id': 'f55616fb-7d48-53d5-89c2-15d7b8e3834c'}, {'message': 'Ingestion task queued successfully.', 'task_id': '92f73a07-2286-4c42-ac02-d3eba0f252e0', 'document_id': '916b0ed7-8440-566f-98cf-ed7c0f5dba9b'}] -``` - -Key features of the ingestion process: -1. Unique `document_id` generation for each file -2. Metadata association, including `user_id` and `collection_ids` for document management -3. Efficient parsing, chunking, and embedding of diverse file types - - - - -R2R allows retrieval of high-level document information stored in a relational table within the Postgres database. To fetch this information: - - - - - - -```bash -r2r documents-overview -``` - - - -```python -client.documents_overview() -``` - - - -```javascript -await client.documentsOverview() -``` - - - -```bash -curl -X POST http://localhost:7272/v2/documents_overview \ - -H "Content-Type: application/json" \ - -d '{ - "document_ids": null, - "user_ids": null - }' -``` - - - - -This command returns document metadata, including: - -```bash -[ - { - 'id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', - 'title': 'aristotle.txt', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'type': 'txt', - 'created_at': '2024-09-06T03:32:02.991742Z', - 'updated_at': '2024-09-06T03:32:02.991744Z', - 'ingestion_status': 'success', - 'restructuring_status': 'pending', - 'version': 'v0', - 'collection_ids': [], - 'metadata': {'title': 'aristotle.txt', 'version': 'v0'} - } - ... -] -``` - -This overview provides quick access to document versions, sizes, and associated metadata, facilitating efficient document management. - - - - - -R2R enables retrieval of specific document chunks and associated metadata. To fetch chunks for a particular document by id: - - - - - -```bash -r2r document-chunks --document-id=9fbe403b-c11c-5aae-8ade-ef22980c3ad1 -``` - - - -```python -client.document_chunks("9fbe403b-c11c-5aae-8ade-ef22980c3ad1") -``` - - - -```javascript -await client.documentChunks("9fbe403b-c11c-5aae-8ade-ef22980c3ad1"), -``` - - - -```bash -curl -X POST http://localhost:7272/v2/document_chunks \ - -H "Content-Type: application/json" \ - -d '{ - "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" - }' -``` - - - - - -This command returns detailed chunk information: - -```bash -[ - { - 'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.', - 'title': 'aristotle.txt', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'version': 'v0', - 'chunk_order': 0, - 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', - 'extraction_id': 'aeba6400-1bd0-5ee9-8925-04732d675434', - 'fragment_id': 'f48bcdad-4155-52a4-8c9d-8ba06e996ba3', - }, - ... -] -``` -These features allow for granular access to document content. - - - -R2R supports flexible document deletion through a method that can run arbitrary deletion filters. To delete a document by its ID: - - - - - -```bash -r2r delete --filter=document_id:eq:9fbe403b-c11c-5aae-8ade-ef22980c3ad1 -``` - - - -```python -client.delete( - { - "document_id": - {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"} - } -) -``` - - - -```javascript -await client.delete(["document_id"], ["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"]); -``` - - - - -```bash -curl -X DELETE http://localhost:7272/v2/delete \ - -H "Content-Type: application/json" \ - -d '{ - "keys": ["document_id"], - "values": ["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"] - }' -``` - - - - - -This command produces output similar to: - -```bash -{results: {}} -``` - -Key features of the deletion process: -1. Deletion by document ID, extraction ID, or other. -2. Cascading deletion of associated chunks and metadata -3. Confirmation of successful deletion - -This flexible deletion mechanism ensures precise control over document management within the R2R system. - - - - R2R provides robust document update capabilities through two main endpoints: `update_documents` and `update_files`. These endpoints allow for seamless updating of existing documents while maintaining version control. - - Key features of the update process: - - 1. **Automatic versioning**: When updating a document, R2R automatically increments the version (e.g., from "v0" to "v1"). - - 2. **Metadata preservation**: The update process maintains existing metadata while allowing for updates. - - 3. **Content replacement**: The new document content completely replaces the old content in the order shown below - - Ingest the new version of the document - - Delete the old version - - Executing the command below will update one of the sample documents ingested earlier. - - - - ```bash - r2r update-files core/examples/data/aristotle_v2.txt --document-ids=9fbe403b-c11c-5aae-8ade-ef22980c3ad1 - ``` - - - - - ```python - file_paths = ["/path/to/r2r/examples/data/aristotle_v2.txt"] - document_ids = ["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"] - client.update_files(file_paths, document_ids) - ``` - - - - ```javascript - const updated_file = [ - { path: "/path/to/r2r/examples/data/aristotle_v2.txt", name: "aristotle_v2.txt" }, - ]; - await client.updateFiles(updated_file, { - document_ids: ["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"], - metadatas: [{ title: "aristotle_v2.txt" }], - }), - ``` - - - - ```bash - curl -X POST http://localhost:7272/v2/update_files \ - -H "Content-Type: multipart/form-data" \ - -F "file_paths=@/path/to/your/r2r/examples/data/aristotle_v2.txt" \ - -F 'document_ids=["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"]' - ``` - - - **Expected Output:** - - ```bash - { - "results": { - "message": "Update task queued successfully.", - "task_id": "00fc8484-179f-47db-a474-d81b95d80cf2", - "document_ids": [ - "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" - ] - } - } - ``` - - Behind the scenes, this command utilizes the `update_files` endpoint. The process involves: - - 1. Reading the new file content - 2. Incrementing the document version - 3. Ingesting the new version with updated metadata - 4. Deleting the old version of the document - - For programmatic updates, you can use the RESTful API endpoint `/update_files`. This endpoint accepts a `R2RUpdateFilesRequest`, which includes: - - - `files`: List of UploadFile objects containing the new document content - - `document_ids`: UUIDs of the documents to update - - `metadatas`: Optional updated metadata for each document - - The update process ensures data integrity and maintains a clear history of document changes through versioning. - - - - -For more advanced document management techniques and user authentication details, refer to [the user auth cookbook](/cookbooks/user-auth). - - -Certainly! I'll rewrite the AI Powered Search section without using dropdowns, presenting it as a continuous, detailed explanation of R2R's search capabilities. Here's the revised version: - -## AI Powered Search - -R2R offers powerful and highly configurable search capabilities, including vector search, hybrid search, and knowledge graph-enhanced search. These features allow for more accurate and contextually relevant information retrieval. - - -### Vector Search - -Vector search inside of R2R is highly configurable, allowing you to fine-tune your search parameters for optimal results. Here's how to perform a basic vector search: - - - - -```python -r2r search --query="What was Uber's profit in 2020?" -``` - - - - -```python -client.search("What was Uber's profit in 2020?", { - "index_measure": "l2_distance", # default is `cosine_distance` - "search_limit": 25, -}) -``` - - - -```javascript -await client.search("What was Uber's profit in 2020?", true, {}, 10, false, { - indexMeasure: "cosine_distance", - includeValues: true, - includeMetadatas: true, - probes: 10, - efSearch: 40 -}); -``` - - - -```bash -curl -X POST http://localhost:7272/v2/search \ - -H "Content-Type: application/json" \ - -d '{ - "query": "What was Uber'\''s profit in 2020?", - "vector_search_settings": { - "use_vector_search": true, - "index_measure": "cosine_distance", - "search_limit": 10, - "include_values": true, - "include_metadatas": true, - "probes": 10, - "ef_search": 40 - } - }' -``` - - - - - - ```json - { 'results': - {'vector_search_results': - [ - { - 'fragment_id': 'ab6d0830-6101-51ea-921e-364984bfd177', - 'extraction_id': '429976dd-4350-5033-b06d-8ffb67d7e8c8', - 'document_id': '26e0b128-3043-5674-af22-a6f7b0e54769', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'collection_ids': [], - 'score': 0.285747126074015, - 'text': 'Net\n loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of ourATG\n Business to Aurora, a $1.6 billion pre-tax net benefit relating to Ubers equity investments, as well as reductions in our fixed cost structure and increasedvariable cost effi\nciencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.Adjusted', - 'metadata': {'title': 'uber_2021.pdf', 'version': 'v0', 'chunk_order': 5, 'associatedQuery': "What was Uber's profit in 2020?"} - }, - ... - ] - } - } - ``` - - -Key configurable parameters for vector search include: - -- `use_vector_search`: Enable or disable vector search. -- `index_measure`: Choose between "cosine_distance", "l2_distance", or "max_inner_product". -- `search_limit`: Set the maximum number of results to return. -- `include_values`: Include search score values in the results. -- `include_metadatas`: Include element metadata in the results. -- `probes`: Number of ivfflat index lists to query (higher increases accuracy but decreases speed). -- `ef_search`: Size of the dynamic candidate list for HNSW index search (higher increases accuracy but decreases speed). - -### Hybrid Search - -R2R supports hybrid search, which combines traditional keyword-based search with vector search for improved results. Here's how to perform a hybrid search: - - - - -```python -r2r search --query="What was Uber's profit in 2020?" --use-hybrid-search -``` - - - -```python -client.search( - "What was Uber's profit in 2020?", - { - "index_measure": "l2_distance", - "use_hybrid_search": True, - "hybrid_search_settings": { - "full_text_weight": 1.0, - "semantic_weight": 5.0, - "full_text_limit": 200, - "rrf_k": 50, - }, - "filters": {"title": {"$in": ["lyft_2021.pdf", "uber_2021.pdf"]}}, - "search_limit": 10, - "probes": 25, - "ef_search": 100, - }, -) -``` - - - -```javascript -await client.search("What was Uber's profit in 2020?", { - indexMeasure: "l2_distance", - useHybridSearch: true, - hybridSearchSettings: { - fullTextWeight: 1.0, - semanticWeight: 5.0, - fullTextLimit: 200, - rrfK: 50 - }, - filters: { title: { $in: ["lyft_2021.pdf", "uber_2021.pdf"] } }, - searchLimit: 10, - probes: 25, - efSearch: 100 -}); -``` - - - -```bash -curl -X POST http://localhost:7272/v2/search \ - -H "Content-Type: application/json" \ - -d '{ - "query": "What was Uber'\''s profit in 2020?", - "vector_search_settings": { - "index_measure": "l2_distance", - "use_hybrid_search": true, - "hybrid_search_settings": { - "full_text_weight": 1.0, - "semantic_weight": 5.0, - "full_text_limit": 200, - "rrf_k": 50 - }, - "filters": {"title": {"$in": ["lyft_2021.pdf", "uber_2021.pdf"]}}, - "search_limit": 10, - "probes": 25, - "ef_search": 100 - } - }' -``` - - - - -### Knowledge Graph Search - -R2R integrates knowledge graph capabilities to enhance search results with structured relationships. Knowledge graph search can be configured to focus on specific entity types, relationships, or search levels. Here's how to utilize knowledge graph search: - - -Knowledge Graphs are not constructed by default, refer to the [cookbook here](/cookbooks/graphrag) before attempting to run the command below! - - - - -```python -r2r search --query="Who founded Airbnb?" --use-kg-search --kg-search-type=local -``` - - - -```python -client.search("Who founded Airbnb?", kg_search_settings={ - "use_kg_search": True, - "kg_search_type": "local", - "kg_search_level": 0, # level of community to search - "max_community_description_length": 65536, - "max_llm_queries_for_global_search": 250, - "local_search_limits": { - "__Entity__": 20, - "__Relationship__": 20, - "__Community__": 20 - } -}) -``` - - - -```javascript -await client.search("Who founded Airbnb?", true, {}, 10, false, {}, { - useKgSearch: true, - kgSearchType: "local", - kgSearchLevel: "0", - maxCommunityDescriptionLength: 65536, - maxLlmQueriesForGlobalSearch: 250, - localSearchLimits: { - __Entity__: 20, - __Relationship__: 20, - __Community__: 20 - } -}); -``` - - - -```bash -curl -X POST http://localhost:7272/v2/search \ - -H "Content-Type: application/json" \ - -d '{ - "query": "Who founded Airbnb?", - "kg_search_settings": { - "use_kg_search": true, - "kg_search_type": "local", - "kg_search_level": "0", - "max_community_description_length": 65536, - "max_llm_queries_for_global_search": 250, - "local_search_limits": { - "__Entity__": 20, - "__Relationship__": 20, - "__Community__": 20 - } - } - }' -``` - - - -Key configurable parameters for knowledge graph search include: - -- `use_kg_search`: Enable knowledge graph search. -- `kg_search_type`: "local" -- `kg_search_level`: Specify the level of community to search. -- `entity_types`: List of entity types to include in the search. -- `relationships`: List of relationship types to include in the search. -- `max_community_description_length`: Maximum length of community descriptions. -- `max_llm_queries_for_global_search`: Limit on the number of LLM queries for global search. -- `local_search_limits`: Set limits for different types of local searches. - -Knowledge graph search provides structured information about entities and their relationships, complementing the text-based search results and offering a more comprehensive understanding of the data. - -R2R's search functionality is highly flexible and can be tailored to specific use cases. By adjusting these parameters, you can optimize the search process for accuracy, speed, or a balance between the two, depending on your application's needs. The combination of vector search, hybrid search, and knowledge graph capabilities allows for powerful and context-aware information retrieval, enhancing the overall performance of your RAG applications. - -## Retrieval-Augmented Generation (RAG) - -R2R is built around a comprehensive Retrieval-Augmented Generation (RAG) engine, allowing you to generate contextually relevant responses based on your ingested documents. The RAG process combines all the search functionality shown above with Large Language Models to produce more accurate and informative answers. - - - - -To generate a response using RAG, use the following command: - - - - - -```bash -r2r rag --query="What was Uber's profit in 2020?" -``` - - - - -```python -client.rag(query="What was Uber's profit in 2020?") -``` - - - -```javascript -await client.rag({ query: "What was Uber's profit in 2020?" }); -``` - - - -```bash -curl -X POST http://localhost:7272/v2/rag \ - -H "Content-Type: application/json" \ - -d '{ - "query": "What was Uber'\''s profit in 2020?" - }' -``` - - - - - -**Example Output:** - -```bash -{'results': [ - ChatCompletion( - id='chatcmpl-9RCB5xUbDuI1f0vPw3RUO7BWQImBN', - choices=[ - Choice( - finish_reason='stop', - index=0, - logprobs=None, - message=ChatCompletionMessage( - content="Uber's profit in 2020 was a net loss of $6,768 million [10].", - role='assistant', - function_call=None, - tool_calls=None) - ) - ], - created=1716268695, - model='gpt-4o-mini', - object='chat.completion', - system_fingerprint=None, - usage=CompletionUsage(completion_tokens=20, prompt_tokens=1470, total_tokens=1490) - ) -]} -``` - -This command performs a search on the ingested documents and uses the retrieved information to generate a response. - - -R2R also supports hybrid search in RAG, combining the power of vector search and keyword-based search. To use hybrid search in RAG, simply add the `use_hybrid_search` flag to your search settings input: - - - -```bash -r2r rag --query="Who is Jon Snow?" --use-hybrid-search -``` - - - - -```javascript -results = client.rag("Who is Jon Snow?", {"use_hybrid_search": True}) -``` - - - -```javascript -await client.rag({ - query: "Who is Jon Snow?", -}); -``` - - - -```bash -curl -X POST http://localhost:7272/v2/rag \ - -H "Content-Type: application/json" \ - -d '{ - "query": "Who is Jon Snow?", - "vector_search_settings": { - "use_vector_search": true, - "filters": {}, - "search_limit": 10, - "use_hybrid_search": true - } - }' -``` - - - - - -**Example Output:** - -```bash -{'results': [ - ChatCompletion( - id='chatcmpl-9cbRra4MNQGEQb3BDiFujvDXIehud', - choices=[ - Choice( - finish_reason='stop', - index=0, - logprobs=None, - message=ChatCompletionMessage( - content="Jon Snow is mentioned in the context as one of Samwell (Sam) Tarly's closest companions at the Wall [5], [6].", - role='assistant', - function_call=None, - tool_calls=None) - ) - ], - created=1718987443, - model='openai/gpt-4o-2024-05-13', - object='chat.completion', - system_fingerprint=None, - usage=CompletionUsage(completion_tokens=20, prompt_tokens=1192, total_tokens=1221) - ) -]} -``` - - -This example demonstrates how hybrid search can enhance the RAG process by combining semantic understanding with keyword matching, potentially providing more accurate and comprehensive results. - - - -R2R also supports streaming RAG responses, which can be useful for real-time applications. To use streaming RAG: - - - -```bash -r2r rag --query="who was aristotle" --use-hybrid-search --stream -``` - - - - -```python -response = client.rag( - "who was aristotle", - rag_generation_config={"stream": True}, - vector_search_settings={"use_hybrid_search": True}, -) -for chunk in response: - print(chunk, end='', flush=True) -``` - - - -```javascript -await client.rag({ - query: query, - rag_generation_config: { - stream: true, - } -}); -``` - - - - - -**Example Output:** - -```bash -["{\"id\":\"808c47c5-ebef-504a-a230-aa9ddcfbd87 .... -Aristotle was an Ancient Greek philosopher and polymath born in 384 BC in Stagira, Chalcidice [1], [4]. He was a student of Plato and later became the tutor of Alexander the Great [2]. Aristotle founded the Peripatetic school of philosophy in the Lyceum in Athens and made significant contributions across a broad range of subjects, including natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts [4]. His work laid the groundwork for the development of modern science [4]. Aristotle's influence extended well beyond his time, impacting medieval Islamic and Christian scholars, and his contributions to logic, ethics, and biology were particularly notable [8], [9], [10].``` -``` - -Streaming allows the response to be generated and sent in real-time, chunk by chunk. - - - R2R offers extensive customization options for its Retrieval-Augmented Generation (RAG) functionality: - - 1. **Search Settings**: Customize vector and knowledge graph search parameters using `VectorSearchSettings` and `KGSearchSettings`. - - 2. **Generation Config**: Fine-tune the language model's behavior with `GenerationConfig`, including: - - Temperature, top_p, top_k for controlling randomness - - Max tokens, model selection, and streaming options - - Advanced settings like beam search and sampling strategies - - 3. **Multiple LLM Support**: Easily switch between different language models and providers: - - OpenAI models (default) - - Anthropic's Claude models - - Local models via Ollama - - Any provider supported by LiteLLM - - Example of customizing the model: - - - - -```bash -r2r rag --query="who was aristotle?" --rag-model="anthropic/claude-3-haiku-20240307" --stream --use-hybrid-search -``` - - - - - ```python - # requires ANTHROPIC_API_KEY is set - response = client.rag( - "Who was Aristotle?", - rag_generation_config={"model":"anthropic/claude-3-haiku-20240307", "stream": True} - ) - for chunk in response: - print(chunk, nl=False) - ``` - - - -```javascript -await client.rag({ - query: query, - rag_generation_config: { - model: 'claude-3-haiku-20240307', - temperature: 0.7, - } -}); -``` - - - - -```bash -# requires ANTHROPIC_API_KEY is set -curl -X POST http://localhost:7272/v2/rag \ - -H "Content-Type: application/json" \ - -d '{ - "query": "Who is Jon Snow?", - "rag_generation_config": { - "model": "claude-3-haiku-20240307", - "temperature": 0.7 - } - }' -``` - - - - - - - This flexibility allows you to optimize RAG performance for your specific use case and leverage the strengths of various LLM providers. - - - -Behind the scenes, R2R's RetrievalService handles RAG requests, combining the power of vector search, optional knowledge graph integration, and language model generation. The flexible architecture allows for easy customization and extension of the RAG pipeline to meet diverse requirements. - - -## User Auth - -R2R provides robust user auth and management capabilities. This section briefly covers user authentication features and how they relate to document management. - - - - -To register a new user: - - - -```python -from r2r import R2RClient - -client = R2RClient("http://localhost:7272") -register_response = client.register("test@example.com", "password123") -print(f"Registration response: {register_response}") -``` - - - -```bash -curl -X POST http://localhost:7272/v2/register \ - -H "Content-Type: application/json" \ - -d '{ - "email": "test@example.com", - "password": "password123" - }' -``` - - - -```javascript -await client.register("test@gmail.com", "password123") -``` - - - -Example output: - -```bash -{ - 'results': { - 'email': 'test@example.com', - 'id': '60af344f-7bd2-43c9-98fd-da53fe5e6d05', - 'is_superuser': False, - 'is_active': True, - 'is_verified': False, - 'verification_code_expiry': None, - 'name': None, - 'bio': None, - 'profile_picture': None, - 'created_at': '2024-07-16T21:50:57.017675Z', 'updated_at': '2024-07-16T21:50:57.017675Z' - } -} -``` - - - - - -After registration, users need to verify their email: - - - -```python -verify_response = client.verify_email("123456") # Verification code sent to email -print(f"Email verification response: {verify_response}") -``` - - - -```bash -curl -X POST http://localhost:7272/v2/verify_email/123456 -``` - - - -```javascript -await client.verifyEmail("123456") -``` - - - - - -To log in and obtain access tokens: - - - -```python -login_response = client.login("test@example.com", "password123") -print(f"Login response: {login_response}") -``` - - - -```bash -curl -X POST http://localhost:7272/v2/login \ - -H "Content-Type: application/x-www-form-urlencoded" \ - -d "username=test@example.com&password=password123" -``` - - - -```javascript -await client.login("test@example.com", "password123") -``` - - - - - - -```bash -# Note, verification is False in default settings -Registration response: { - 'results': { - 'access_token': { - 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0cXFAZXhhbXBsZS5jb20iLCJleHAiOjE3MjExOTU3NDQuNzQ1MTM0LCJ0b2tlbl90eXBlIjoiYWNjZXNzIn0.-HrQlguPW4EmPupOYyn5793luaDb-YhEpEsIyQ2CbLs', - 'token_type': 'access' - }, - 'refresh_token': { - 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0cXFAZXhhbXBsZS5jb20iLCJleHAiOjE3MjE3NzE3NDQsInRva2VuX3R5cGUiOiJyZWZyZXNoIn0.auuux_0Gg6_b5gTlUOQVCcdPuZl0eM-NFlC1OHdBqiE', - 'token_type': 'refresh' - } - } -} -``` - - - -To retrieve information about the currently authenticated user: - - - -```python -# requires client.login(...) -user_response = client.user()["results"] -print(f"Current user: {user_response}") -``` - - - -```bash -curl -X GET http://localhost:7272/v2/user \ - -H "Authorization: Bearer YOUR_ACCESS_TOKEN" -``` - - - -```javascript -await client.usersOverview() -``` - - - -```bash -# Note, verification is False in default settings -Current user: { - 'results': { - 'access_token': { - 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0cXFAZXhhbXBsZS5jb20iLCJleHAiOjE3MjExOTU3NDQuNzQ1MTM0LCJ0b2tlbl90eXBlIjoiYWNjZXNzIn0.-HrQlguPW4EmPupOYyn5793luaDb-YhEpEsIyQ2CbLs', - 'token_type': 'access' - }, - 'refresh_token': { - 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0cXFAZXhhbXBsZS5jb20iLCJleHAiOjE3MjE3NzE3NDQsInRva2VuX3R5cGUiOiJyZWZyZXNoIn0.auuux_0Gg6_b5gTlUOQVCcdPuZl0eM-NFlC1OHdBqiE', - 'token_type': 'refresh' - } - } -} -``` - - - - - -Once authenticated, search results are automatically filtered to include only documents associated with the current user: - - - -```python -# requires client.login(...) -search_response = client.search(query="Who was Aristotle")["results"] -print(f"Search results: {search_response}") -``` - - - -```bash -curl -X POST http://localhost:7272/v2/search \ - -H "Authorization: Bearer YOUR_ACCESS_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "query": "Who was Aristotle" - }' -``` - - - -```javascript -await client.search("Who was Aristotle") -``` - - -```bash -# search results are empty for a new user -Search results: {'vector_search_results': [], 'kg_search_results': []} -``` - - - - -To refresh an expired access token: - - - -```python -# requires client.login(...) -refresh_response = client.refresh_access_token()["results"] -print(f"Token refresh response: {refresh_response}") -``` - - - -```bash -curl -X POST http://localhost:7272/v2/refresh_access_token \ - -H "Authorization: Bearer YOUR_REFRESH_TOKEN" \ - -H "Content-Type: application/json" \ - -d '{ - "refresh_token": "YOUR_REFRESH_TOKEN" - }' -``` - - - -```javascript -await client.refreshAccessToken() -``` - - -```bash -Token refresh response: -{ - 'access_token': { - 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0cXFAZXhhbXBsZS5jb20iLCJleHAiOjE3MjExOTU5NTYuODEzNDg0LCJ0b2tlbl90eXBlIjoiYWNjZXNzIn0.-CJy_cH7DRH5FKpZZauAFPP4mncnSa1j8NnaM7utGHo', - 'token_type': 'access' - }, - 'refresh_token': { - 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0cXFAZXhhbXBsZS5jb20iLCJleHAiOjE3MjE3NzE5NTYsInRva2VuX3R5cGUiOiJyZWZyZXNoIn0.uGsgTYaUd3Mn5h24uE4ydCWhOr2vFNA9ziRAAaYgnfk', - 'token_type': 'refresh' - } -} -``` - - - -To log out and invalidate the current access token: - - - -```python -# requires client.login(...) -logout_response = client.logout() -print(f"Logout response: {logout_response}") -``` - - - -```bash -curl -X POST http://localhost:7272/v2/logout \ - -H "Authorization: Bearer YOUR_ACCESS_TOKEN" -``` - - - -```javascript -await client.logout() -``` - - -```bash -{ - 'results': {'message': 'Logged out successfully'} -} -``` - - - - - -These authentication features ensure that users can only access and manage their own documents. When performing operations like search, RAG, or document management, the results are automatically filtered based on the authenticated user's permissions. - -Remember to replace `YOUR_ACCESS_TOKEN` and `YOUR_REFRESH_TOKEN` with actual tokens obtained during the login process. - -## Observability and Analytics - -R2R provides robust observability and analytics features, allowing superusers to monitor system performance, track usage patterns, and gain insights into the RAG application's behavior. These advanced features are crucial for maintaining and optimizing your R2R deployment. - - -Observability and analytics features are restricted to superusers only. By default, R2R is configured to treat unauthenticated users as superusers for quick testing and development. In a production environment, you should disable this setting and properly manage superuser access. - - - - - - - -R2R offers high level user observability for superusers - - - - -```bash -r2r users-overview -``` - - - - -```python -client.users_overview() -``` - - - -```javascript -await client.users_overview() -``` - - - - -```bash -curl -X POST http://localhost:7272/v2/users_overview \ - -H "Content-Type: application/json" \ - -d '{}' -``` - - - - - - -This command returns detailed log user information, here's some example output: - -```bash -{'results': [{'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'num_files': 9, 'total_size_in_bytes': 4027056, 'document_ids': ['9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'e0fc8bbc-95be-5a98-891f-c17a43fa2c3d', 'cafdf784-a1dc-5103-8098-5b0a97db1707', 'b21a46a4-2906-5550-9529-087697da2944', '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'f17eac52-a22e-5c75-af8f-0b25b82d43f8', '022fdff4-f87d-5b0c-82e4-95d53bcc4e60', 'c5b31b3a-06d2-553e-ac3e-47c56139b484', 'e0c2de57-171d-5385-8081-b546a2c63ce3']}, ...]}} -``` - - -This summary returns information for each user about their number of files ingested, the total size of user ingested files, and the corresponding document ids. - - - -R2R automatically logs various events and metrics during its operation. You can access these logs using the `logs` command: - - - - -```bash -r2r logs -``` - - - -```python -client.logs() -``` - - - -```javascript -await client.logs() -``` - - - - -```bash -curl -X POST http://localhost:7272/v2/logs \ - -H "Content-Type: application/json" \ - -d '{ - "log_type_filter": null, - "max_runs_requested": 100 - }' -``` - - - - - - -This command returns detailed log entries for various operations, including search and RAG requests. Here's an example of a log entry: - -```python -{ - 'run_id': UUID('27f124ad-6f70-4641-89ab-f346dc9d1c2f'), - 'run_type': 'rag', - 'entries': [ - {'key': 'search_results', 'value': '["{\\"id\\":\\"7ed3a01c-88dc-5a58-a68b-6e5d9f292df2\\",...}"]'}, - {'key': 'search_query', 'value': 'Who is aristotle?'}, - {'key': 'rag_generation_latency', 'value': '3.79'}, - {'key': 'llm_response', 'value': 'Aristotle (Greek: Ἀριστοτέλης Aristotélēs; 384–322 BC) was...'} - ] -} -``` - -These logs provide detailed information about each operation, including search results, queries, latencies, and LLM responses. - - - -R2R offers an analytics feature that allows you to aggregate and analyze log data. You can use the `analytics` command to retrieve various statistics: - - - - - -```python -client.analytics( - {"search_latencies": "search_latency"}, - {"search_latencies": ["basic_statistics", "search_latency"]} -) -``` - - - -```javascript -const filterCriteria = { - filters: { - search_latencies: "search_latency", - }, - }; - - const analysisTypes = { - search_latencies: ["basic_statistics", "search_latency"], - }; - - await client.analytics(filterCriteria, analysisTypes); -``` - - - - -```bash -curl -X POST http://localhost:7272/v2/analytics \ - -H "Content-Type: application/json" \ - -d '{ - "filter_criteria": { - "filters": { - "search_latencies": "search_latency" - } - }, - "analysis_types": - { - "analysis_types": { - "search_latencies": ["basic_statistics", "search_latency"] - } - } - }' -``` - - - - - - -This command returns aggregated statistics based on the specified filters and analysis types. Here's an example output: - -```python -{ - 'results': { - 'filtered_logs': { - 'search_latencies': [ - { - 'timestamp': '2024-06-20 21:29:06', - 'log_id': UUID('0f28063c-8b87-4934-90dc-4cd84dda5f5c'), - 'key': 'search_latency', - 'value': '0.66', - 'rn': 3 - } - ] - }, - 'search_latencies': { - 'Mean': 0.66, - 'Median': 0.66, - 'Mode': 0.66, - 'Standard Deviation': 0, - 'Variance': 0 - } - } -} -``` - -This analytics feature allows you to: -1. Filter logs based on specific criteria -2. Perform statistical analysis on various metrics (e.g., search latencies) -3. Track performance trends over time -4. Identify potential bottlenecks or areas for optimization - - - -R2R's analytics system is flexible and allows for custom analysis. You can specify different filters and analysis types to focus on specific aspects of your application's performance. For example: - -- Analyze RAG latencies -- Track usage patterns by user or document type -- Monitor error rates and types -- Assess the effectiveness of different LLM models or configurations - -To perform custom analytics, modify the `filters` and `analysis_types` parameters in the `analytics` command to suit your specific needs. - - - - -These observability and analytics features provide valuable insights into your R2R application's performance and usage, enabling data-driven optimization and decision-making. diff --git a/docs/cookbooks/web-dev.mdx b/docs/cookbooks/web-dev.mdx deleted file mode 100644 index 2647f5cac..000000000 --- a/docs/cookbooks/web-dev.mdx +++ /dev/null @@ -1,315 +0,0 @@ ---- -title: 'Web Development' -description: 'Learn how to build webapps powered by RAG using R2R' -icon: 'window' ---- - -Web developers can easily integrate R2R into their projects using the [R2R JavaScript client](https://github.com/SciPhi-AI/r2r-js). -For more extensive reference and examples of how to use the r2r-js library, we encourage you to look at the [R2R Application](/cookbooks/application) and its source code. - -## Hello R2R—JavaScript - -R2R gives developers configurable vector search and RAG right out of the box, as well as direct method calls instead of the client-server architecture seen throughout the docs: -```python r2r-js/examples/hello_r2r.js - -const { r2rClient } = require("r2r-js"); - -const client = new r2rClient("http://localhost:7272"); - -async function main() { - const files = [ - { path: "examples/data/raskolnikov.txt", name: "raskolnikov.txt" }, - ]; - - const EMAIL = "admin@example.com"; - const PASSWORD = "change_me_immediately"; - console.log("Logging in..."); - await client.login(EMAIL, PASSWORD); - - console.log("Ingesting file..."); - const ingestResult = await client.ingestFiles(files, { - metadatas: [{ title: "raskolnikov.txt" }], - skip_document_info: false, - }); - console.log("Ingest result:", JSON.stringify(ingestResult, null, 2)); - - console.log("Performing RAG..."); - const ragResponse = await client.rag({ - query: "What does the file talk about?", - rag_generation_config: { - model: "openai/gpt-4o", - temperature: 0.0, - stream: false, - }, - }); - - console.log("Search Results:"); - ragResponse.results.search_results.vector_search_results.forEach( - (result, index) => { - console.log(`\nResult ${index + 1}:`); - console.log(`Text: ${result.metadata.text.substring(0, 100)}...`); - console.log(`Score: ${result.score}`); - }, - ); - - console.log("\nCompletion:"); - console.log(ragResponse.results.completion.choices[0].message.content); -} - -main(); -``` - -## r2r-js Client -### Installing - -To get started, install the R2R JavaScript client with [npm](https://www.npmjs.com/package/r2r-js): - - - -```bash -npm install r2r-js -``` - - - -### Creating the Client -First, we create the R2R client and specify the base URL where the R2R server is running: - -```javascript -const { r2rClient } = require("r2r-js"); - -// http://localhost:7272 or the address that you are running the R2R server -const client = new r2rClient("http://localhost:7272"); -``` - -### Log into the server -Sign into the server to authenticate the session. We'll use the default superuser credentials: - -```javascript -const EMAIL = "admin@example.com"; -const PASSWORD = "change_me_immediately"; -console.log("Logging in..."); -await client.login(EMAIL, PASSWORD); -``` - -### Ingesting Files -Specify the files that we'll ingest: - -```javascript -const files = [ - { path: "examples/data/raskolnikov.txt", name: "raskolnikov.txt" }, -]; -console.log("Ingesting file..."); - const ingestResult = await client.ingestFiles(files, { - metadatas: [{ title: "raskolnikov.txt" }], - skip_document_info: false, - }); -console.log("Ingest result:", JSON.stringify(ingestResult, null, 2)); -... -/* Ingest result: { - "results": { - "processed_documents": [ - "Document 'raskolnikov.txt' processed successfully." - ], - "failed_documents": [], - "skipped_documents": [] - } -} */ -``` - -This command processes the ingested, splits them into chunks, embeds the chunks, and stores them into your specified Postgres database. Relational data is also stored to allow for downstream document management, which you can read about in the [quickstart](https://r2r-docs.sciphi.ai/quickstart). - -### Performing RAG -We'll make a RAG request, - -```javascript -console.log("Performing RAG..."); - const ragResponse = await client.rag({ - query: "What does the file talk about?", - rag_generation_config: { - model: "openai/gpt-4o", - temperature: 0.0, - stream: false, - }, - }); - -console.log("Search Results:"); - ragResponse.results.search_results.vector_search_results.forEach( - (result, index) => { - console.log(`\nResult ${index + 1}:`); - console.log(`Text: ${result.metadata.text.substring(0, 100)}...`); - console.log(`Score: ${result.score}`); - }, - ); - - console.log("\nCompletion:"); - console.log(ragResponse.results.completion.choices[0].message.content); -... -/* Performing RAG... -Search Results: - -Result 1: -Text: praeterire culinam eius, cuius ianua semper aperta erat, cogebatur. Et quoties praeteribat, -iuvenis ... -Score: 0.08281802143835804 - -Result 2: -Text: In vespera praecipue calida ineunte Iulio iuvenis e cenaculo in quo hospitabatur in -S. loco exiit et... -Score: 0.052743945852283036 - -Completion: -The file discusses the experiences and emotions of a young man who is staying in a small room in a tall house. -He is burdened by debt and feels anxious and ashamed whenever he passes by the kitchen of his landlady, whose -door is always open [1]. On a particularly warm evening in early July, he leaves his room and walks slowly towards -a bridge, trying to avoid encountering his landlady on the stairs. His room, which is more like a closet than a -proper room, is located under the roof of the five-story house, while the landlady lives on the floor below and -provides him with meals and services [2]. -*/ -``` - -## Connecting to a Web App -R2R can be easily integrated into web applications. We'll create a simple Next.js app that uses R2R for query answering. [We've created a template repository with this code.](https://github.com/SciPhi-AI/r2r-webdev-template) - -Alternatively, you can add the code below to your own Next.js project. - -![R2R Dashboard Overview](/images/r2r_webdev_template.png) - -### Setting up an API Route - -First, we'll create an API route to handle R2R queries. Create a file named `r2r-query.ts` in the `pages/api` directory: - - -```typescript -import { NextApiRequest, NextApiResponse } from 'next'; -import { r2rClient } from 'r2r-js'; - -const client = new r2rClient("http://localhost:7272"); - -export default async function handler(req: NextApiRequest, res: NextApiResponse) { - if (req.method === 'POST') { - const { query } = req.body; - - try { - // Login with each request. In a production app, you'd want to manage sessions. - await client.login("admin@example.com", "change_me_immediately"); - - const response = await client.rag({ - query: query, - rag_generation_config: { - model: "openai/gpt-4o", - temperature: 0.0, - stream: false, - } - }); - - res.status(200).json({ result: response.results.completion.choices[0].message.content }); - } catch (error) { - res.status(500).json({ error: error instanceof Error ? error.message : 'An error occurred' }); - } - } else { - res.setHeader('Allow', ['POST']); - res.status(405).end(`Method ${req.method} Not Allowed`); - } -} -``` - - - -This API route creates an R2R client, logs in, and processes the incoming query using the RAG method. - -### Frontend: React Component - -Next, create a React component to interact with the API. Here's an example `index.tsx` file: - - -```tsx -import React, { useState } from 'react'; -import styles from '@/styles/R2RWebDevTemplate.module.css'; - -const R2RQueryApp: React.FC = () => { - const [query, setQuery] = useState(''); - const [result, setResult] = useState(''); - const [isLoading, setIsLoading] = useState(false); - - const performQuery = async () => { - setIsLoading(true); - setResult(''); - - try { - const response = await fetch('/api/r2r-query', { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - }, - body: JSON.stringify({ query }), - }); - - if (!response.ok) { - throw new Error('Network response was not ok'); - } - - const data = await response.json(); - setResult(data.result); - } catch (error) { - setResult(`Error: ${error instanceof Error ? error.message : String(error)}`); - } finally { - setIsLoading(false); - } - }; - - return ( -
-

R2R Web Dev Template

-

A simple template for making RAG queries with R2R. - Make sure that your R2R server is up and running, and that you've ingested files! -

-

- Check out the R2R Documentation for more information. -

- setQuery(e.target.value)} - placeholder="Enter your query here" - className={styles.queryInput} - /> - - {isLoading ? ( -
- ) : ( -
{result}
- )} -
- ); -}; - -export default R2RQueryApp; -``` - - - -This component creates a simple interface with an input field for the query and a button to submit it. When the button is clicked, it sends a request to the API route we created earlier and displays the result. - -### Template Repository - -For a complete working example, you can check out our template repository. This repository contains a simple Next.js app with R2R integration, providing a starting point for your own R2R-powered web applications. - -For more advanced examples, check out the [source code for the R2R Dashboard.](https://github.com/SciPhi-AI/R2R-Application) - -[R2R Web App Template Repository](https://github.com/SciPhi-AI/r2r-webdev-template) - -To use this template: - -1. Clone the repository -2. Install dependencies with `pnpm install` -3. Make sure your R2R server is running -4. Start the development server with `pnpm dev` - -This template provides a foundation for building more complex applications with R2R, demonstrating how to integrate R2R's powerful RAG capabilities into a web interface. diff --git a/docs/development.mdx b/docs/development.mdx deleted file mode 100644 index 35b8c6661..000000000 --- a/docs/development.mdx +++ /dev/null @@ -1,96 +0,0 @@ ---- -title: 'Development' -description: 'Learn how to preview changes locally' ---- - - - **Prerequisite** You should have installed Node.js (version 18.10.0 or - higher). - - -Step 1. Install Mintlify on your OS: - - - -```bash npm -npm i -g mintlify -``` - -```bash yarn -yarn global add mintlify -``` - - - -Step 2. Go to the docs are located (where you can find `mint.json`) and run the following command: - -```bash -mintlify dev -``` - -The documentation website is now available at `http://localhost:3000`. - -### Custom Ports - -Mintlify uses port 3000 by default. You can use the `--port` flag to customize the port Mintlify runs on. For example, use this command to run in port 3333: - -```bash -mintlify dev --port 3333 -``` - -You will see an error like this if you try to run Mintlify in a port that's already taken: - -```md -Error: listen EADDRINUSE: address already in use :::3000 -``` - -## Mintlify Versions - -Each CLI is linked to a specific version of Mintlify. Please update the CLI if your local website looks different than production. - - - -```bash npm -npm i -g mintlify@latest -``` - -```bash yarn -yarn global upgrade mintlify -``` - - - -## Deployment - - - Unlimited editors available under the [Startup - Plan](https://mintlify.com/pricing) - - -You should see the following if the deploy successfully went through: - - - - - -## Troubleshooting - -Here's how to solve some common problems when working with the CLI. - - - - Update to Node v18. Run `mintlify install` and try again. - - -Go to the `C:/Users/Username/.mintlify/` directory and remove the `mint` -folder. Then Open the Git Bash in this location and run `git clone -https://github.com/mintlify/mint.git`. - -Repeat step 3. - - - - Try navigating to the root of your device and delete the ~/.mintlify folder. - Then run `mintlify dev` again. - - diff --git a/docs/documentation/api-reference.mdx b/docs/documentation/api-reference.mdx deleted file mode 100644 index e3d89e123..000000000 --- a/docs/documentation/api-reference.mdx +++ /dev/null @@ -1,6 +0,0 @@ ---- -title: 'API Reference' -description: 'Build, scale, and manage user-facing Retrieval-Augmented Generation applications.' -icon: 'message-code' -redirect: api-reference/introduction ---- diff --git a/docs/documentation/cli/graph.mdx b/docs/documentation/cli/graph.mdx deleted file mode 100644 index b12666a6c..000000000 --- a/docs/documentation/cli/graph.mdx +++ /dev/null @@ -1,153 +0,0 @@ ---- -title: 'Knowledge Graph' -description: 'Managing the knowledge graph with the R2R CLI.' ---- -## Knowledge Graph Management - -### Create Graph - -Create a knowledge graph for a collection using the `create-graph` command: - -````bash -r2r create-graph --collection-id my-collection --run --kg-creation-settings '{"key": "value"}' --force-kg-creation -```` - - - - - Collection ID to create graph for. Default is "". - - - - Run the graph creation process. - - - - Settings for the graph creation process as a JSON string. - - - - Force the graph creation process. - - - - -### Deduplicate Entities - -Deduplicate entities in a collection using the `deduplicate-entities` command: - -````bash -r2r deduplicate-entities --collection-id my-collection --run --deduplication-settings '{"key": "value"}' -```` - - - -### Enrich Graph - -Enrich an existing knowledge graph using the `enrich-graph` command: - -````bash -r2r enrich-graph --collection-id my-collection --run --force-kg-enrichment --kg-enrichment-settings '{"key": "value"}' -```` - - - - - Collection ID to enrich graph for. Default is "". - - - - Run the graph enrichment process. - - - - Force the graph enrichment process. - - - - Settings for the graph enrichment process as a JSON string. - - - - -### Get Entities - -Retrieve entities from the knowledge graph using the `get-entities` command: - -````bash -r2r get-entities --collection-id my-collection --offset 0 --limit 10 --entity-ids entity1 entity2 -```` - - - - - Collection ID to retrieve entities from. - - - - Offset for pagination. Default is 0. - - - - Limit for pagination. Default is 100. - - - - Entity IDs to filter by. - - - - -### Get Triples - -Retrieve triples from the knowledge graph using the `get-triples` command: - -````bash -r2r get-triples --collection-id my-collection --offset 0 --limit 10 --triple-ids triple1 triple2 --entity-names entity1 entity2 -```` - - - - - Collection ID to retrieve triples from. - - - - Offset for pagination. Default is 0. - - - - Limit for pagination. Default is 100. - - - - Triple IDs to filter by. - - - - Entity names to filter by. - - - - -### Delete Graph - -Delete the graph for a collection using the `delete-graph` command: - -````bash -r2r delete-graph --collection-id my-collection --cascade -```` - - - - - Collection ID to delete the graph for. - - - - Whether to cascade the deletion. - - NOTE: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. - - - diff --git a/docs/documentation/cli/ingestion.mdx b/docs/documentation/cli/ingestion.mdx deleted file mode 100644 index 8adadf704..000000000 --- a/docs/documentation/cli/ingestion.mdx +++ /dev/null @@ -1,230 +0,0 @@ ---- -title: 'Ingestion' -description: 'Ingesting files and managing vector indices with the R2R CLI.' ---- - -## Document Ingestion and Management - -### Ingest Files - -Ingest files or directories into your R2R system using the `ingest-files` command: - -```bash -r2r ingest-files path/to/file1.txt path/to/file2.txt \ - --document-ids 9fbe403b-c11c-5aae-8ade-ef22980c3ad1 \ - --metadatas '{"key1": "value1"}' -``` - - - - - The paths to the files to ingest. - - - - Optional document IDs to assign to the ingested files. If not provided, new document IDs will be generated. - - - - Optional metadata to attach to the ingested files, provided as a JSON string. If ingesting multiple files, the metadata will be applied to all files. - - - - -### Retry Failed Ingestions - -Retry ingestion for documents that previously failed using the `retry-ingest-files` command: - -```bash -r2r retry-ingest-files 9fbe403b-c11c-5aae-8ade-ef22980c3ad1 -``` - - - - - The IDs of the documents to retry ingestion for. - - - - -### Update Files - -Update existing documents using the `update-files` command: - -```bash -r2r update-files path/to/file1_v2.txt \ - --document-ids 9fbe403b-c11c-5aae-8ade-ef22980c3ad1 \ - --metadatas '{"key1": "value2"}' -``` - - - - - The paths to the updated files. - - - - The IDs of the documents to update, provided as a comma-separated string. - - - - Optional updated metadata to attach to the documents, provided as a JSON string. If updating multiple files, the metadata will be applied to all files. - - - - -## Vector Index Management -## Vector Index Management - -### Create Vector Index - -Create a new vector index for similarity search using the `create-vector-index` command: - -```bash -r2r create-vector-index \ - --table-name vectors \ - --index-method hnsw \ - --index-measure cosine_distance \ - --index-arguments '{"m": 16, "ef_construction": 64}' -``` - - - - - Table to create index on. Options: vectors, entities_document, entities_collection, communities. Default: vectors - - - - Indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw - - - - Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance - - - - Configuration parameters as JSON string. For HNSW: `{"m": int, "ef_construction": int}`. For IVFFlat: `{"n_lists": int}` - - - - Optional custom name for the index. If not provided, one will be auto-generated - - - - Disable concurrent index creation. Default: False - - - - -#### Important Considerations - -Vector index creation requires careful planning and consideration of your data and performance requirements. Keep in mind: - -**Resource Intensive Process** -- Index creation can be CPU and memory intensive, especially for large datasets -- For HNSW indexes, memory usage scales with both dataset size and `m` parameter -- Consider creating indexes during off-peak hours for production systems - -**Performance Tuning** -1. **HNSW Parameters:** - - `m`: Higher values (16-64) improve search quality but increase memory usage and build time - - `ef_construction`: Higher values increase build time and quality but have diminishing returns past 100 - - Recommended starting point: `m=16`, `ef_construction=64` - -```bash -# Example balanced configuration -r2r create-vector-index \ - --table-name vectors \ - --index-method hnsw \ - --index-measure cosine_distance \ - --index-arguments '{"m": 16, "ef_construction": 64}' -``` - -**Pre-warming Required** -- **Important:** Newly created indexes require pre-warming to achieve optimal performance -- Initial queries may be slower until the index is loaded into memory -- The first several queries will automatically warm the index -- For production systems, consider implementing explicit pre-warming by running representative queries after index creation -- Without pre-warming, you may not see the expected performance improvements - -**Best Practices** -1. Always use concurrent index creation (avoid `--no-concurrent`) in production to prevent blocking other operations -2. Monitor system resources during index creation -3. Test index performance with representative queries before deploying -4. Consider creating indexes on smaller test datasets first to validate parameters -5. Implement index pre-warming strategy before handling production traffic - -**Distance Measures** -Choose the appropriate measure based on your use case: -- `cosine_distance`: Best for normalized vectors (most common) -- `l2_distance`: Better for absolute distances -- `max_inner_product`: Optimized for dot product similarity - -### List Vector Indices - -List existing vector indices using the `list-vector-indices` command: - -```bash -r2r list-vector-indices --table-name vectors -``` - - - - - Table to list indices from. Options: vectors, entities_document, entities_collection, communities. Default: vectors - - - - -### Delete Vector Index - -Delete a vector index using the `delete-vector-index` command: - -```bash -r2r delete-vector-index my-index-name --table-name vectors -``` - - - - - Name of the index to delete - - - - Table containing the index. Options: vectors, entities_document, entities_collection, communities. Default: vectors - - - - Disable concurrent index deletion. Default: False - - - - -## Sample File Management - -### Ingest Sample Files - -Ingest one or more sample files from the R2R GitHub repository: - -```bash -# Ingest a single sample file -r2r ingest-sample-file - -# Ingest a smaller version of the sample file -r2r ingest-sample-file --v2 - -# Ingest multiple sample files -r2r ingest-sample-files -``` - -These commands have no additional arguments. The `--v2` flag for `ingest-sample-file` ingests a smaller version of the sample Aristotle text file. - -### Ingest Local Sample Files - -Ingest the local sample files in the `core/examples/data_unstructured` directory: - -```bash -r2r ingest-sample-files-from-unstructured -``` - -This command has no additional arguments. It will ingest all files found in the `data_unstructured` directory. diff --git a/docs/documentation/cli/introduction.mdx b/docs/documentation/cli/introduction.mdx deleted file mode 100644 index 348bd68c9..000000000 --- a/docs/documentation/cli/introduction.mdx +++ /dev/null @@ -1,62 +0,0 @@ ---- -title: 'Introduction' -description: 'Build, scale, and manage user-facing Retrieval-Augmented Generation applications with the R2R CLI.' ---- - -# R2R CLI Documentation - -## Installation - -Before starting, make sure you have completed the [R2R installation](/documentation/installation). - -The R2R CLI is automatically installed as part of the R2R Python package. No additional installation steps are required. - -## Getting Started - -1. Ensure R2R is running correctly by checking the health status: - -```bash -r2r health -``` - -You should see output similar to: -``` -{"status":"ok"} -``` - -2. Ingest a sample file to test the ingestion process: - -```bash -r2r ingest-sample-file -``` - -This will download and ingest a sample text file from the R2R GitHub repository. You can verify the ingestion was successful by checking the documents overview: - -```bash -r2r documents-overview -``` - -3. Perform a search on the ingested document: - -```bash -r2r search --query "What did Aristotle contribute to philosophy?" -``` - -This will search the ingested document for relevant information to answer the query. - -4. Generate a response using RAG (Retrieval-Augmented Generation): - -```bash -r2r rag --query "What were Aristotle's main contributions to logic?" -``` - -This will perform a search on the ingested document and use the retrieved information to generate a complete response to the query. - -## Additional Documentation - -For more detailed information on specific functionalities of the R2R CLI, please refer to the following documentation: - -- [Document Ingestion](/documentation/cli/ingestion): Learn how to add, retrieve, and manage documents using the CLI. -- [Search & RAG](/documentation/cli/retrieval): Explore various querying techniques and Retrieval-Augmented Generation capabilities. -- [Knowledge Graphs](/documentation/cli/graph): Learn how to create and enrich knowledge graphs, and perform GraphRAG. -- [Server Management](/documentation/cli/server): Manage your R2R server, including health checks, logs, and updates. diff --git a/docs/documentation/cli/maintenance.mdx b/docs/documentation/cli/maintenance.mdx deleted file mode 100644 index b393fbae1..000000000 --- a/docs/documentation/cli/maintenance.mdx +++ /dev/null @@ -1,220 +0,0 @@ ---- -title: 'Maintenance' -description: 'Managing R2R versions, updates, and database migrations across environments.' ---- - -## Deployment Management - -R2R deployments consist of three main components that need to be managed: -1. The R2R Python package -2. The Docker images -3. The database schema - -### Version Management - -Check your current R2R version: - -```bash -r2r version -``` - -### Update R2R - -Update your R2R installation to the latest version: - -```bash -r2r update -``` - -This command will: -- Upgrade the R2R package to the latest version using pip -- Display the update progress and confirmation -- Show any errors if they occur during the update process - - -When you update R2R, the Docker image used by `r2r serve` will automatically be updated to match the new version. The system will attempt to use a version-specific image (e.g., `ragtoriches/prod:1.2.3`) or fall back to `latest` if the specific version isn't available. - - -### Database Management - -R2R uses database migrations to manage schema changes across versions. After updating R2R, you should always check and update your database schema: - -### Check Current Migration - -View the current migration state of your database: - -```bash -r2r db current -``` - - - - - Schema name to check. Defaults to R2R_PROJECT_NAME environment variable. - - - - -### Apply Migrations - -Upgrade your database to the latest version: - -```bash -r2r db upgrade -``` - - - - - Schema name to upgrade. Defaults to R2R_PROJECT_NAME environment variable. - - - Specific revision to upgrade to. Defaults to "head" (latest version). - - - - -## Deployment Process - -Here's the recommended process for updating an R2R deployment: - -1. **Prepare for Update** - ```bash - # Check current versions - r2r version - r2r db current - - # Generate system report (optional) - r2r generate-report - ``` - -2. **Stop Running Services** - ```bash - # Bring down existing deployment - r2r docker-down - ``` - -3. **Update R2R** - ```bash - r2r update - ``` - -4. **Update Database** - ```bash - # Check and apply any new migrations - r2r db upgrade - ``` - -5. **Restart Services** - ```bash - # Start the server with your configuration - r2r serve --docker [additional options] - ``` - - - - - Host to run the server on. Default is "0.0.0.0". - - - Port to run the server on. Default comes from R2R_PORT or PORT env var, or 7272. - - - Run using Docker (recommended for production). - - - Run the full R2R compose with Hatchet and Unstructured. - - - Project name for Docker deployment. - - - Specific Docker image to use (optional). - - - Exclude creating a Postgres container. - - - - - - PostgreSQL host address. Default is "localhost". - - - PostgreSQL port. Default is "5432". - - - PostgreSQL database name. Default is "postgres". - - - PostgreSQL username. Default is "postgres". - - - Project name used for schema. Default is "r2r_default". - - - - -## Managing Multiple Environments - -For different environments (development, staging, production), use different project names and schemas: - -```bash -# Development -export R2R_PROJECT_NAME=r2r_dev -r2r serve --docker --project-name r2r-dev - -# Staging -export R2R_PROJECT_NAME=r2r_staging -r2r serve --docker --project-name r2r-staging - -# Production -export R2R_PROJECT_NAME=r2r_prod -r2r serve --docker --project-name r2r-prod -``` - -## Vector Index Management - -R2R uses vector indices to enable efficient similarity search across documents. For detailed information about managing vector indices, including creation, listing, and deletion, see the [Ingestion documentation](/documentation/cli/ingestion). - -Key vector index management commands: -```bash -# Create a new vector index -r2r create-vector-index - -# List existing indices -r2r list-vector-indices - -# Delete an index -r2r delete-vector-index -``` - - -## Troubleshooting - -If issues occur during deployment: - -1. Generate a system report: - ```bash - r2r generate-report - ``` - -2. Check container health: - ```bash - # Bring down existing deployment - r2r docker-down - - # Start fresh and watch for health checks - r2r serve --docker - ``` - -3. Review the database state: - ```bash - r2r db current - r2r db history - ``` - -4. If needed, roll back database changes: - ```bash - r2r db downgrade --revision - ``` diff --git a/docs/documentation/cli/management.mdx b/docs/documentation/cli/management.mdx deleted file mode 100644 index be11a251f..000000000 --- a/docs/documentation/cli/management.mdx +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: 'Management' -description: 'Managing your R2R system with the CLI.' ---- -## System Management - -### Retrieve Analytics - -Retrieve analytics data using the `analytics` command: - -```bash -r2r analytics --filters '{"key1": "value1"}' --analysis-types '{"type1": true}' -``` - - - - - Optional filters for analytics as a JSON string. - - - - Optional analysis types as a JSON string. - - - - -### Retrieve Application Settings - -Retrieve application settings using the `app-settings` command: - -```bash -r2r app-settings -``` - -This command has no additional arguments. - -### Get User Overview - -Get an overview of users using the `users-overview` command: - -```bash -r2r users-overview --user-ids user1 user2 --offset 0 --limit 10 -``` - - - - - Optional user IDs to overview. - - - - The offset to start from. Defaults to 0. - - - - The maximum number of nodes to return. Defaults to 100. - - - - -### Delete Documents - -Delete documents based on filters using the `delete` command: - -```bash -r2r delete -f key1:eq:value1 -f key2:gt:value2 -``` - - - - - Filters for deletion in the format key:operator:value. - - - - -### Get Document Overview - -Get an overview of documents using the `documents-overview` command: - -```bash -r2r documents-overview --document-ids doc1 doc2 --offset 0 --limit 10 -``` - - - - - Optional document IDs to overview. - - - - The offset to start from. Defaults to 0. - - - - The maximum number of nodes to return. Defaults to 100. - - - - -### Get Document Chunks - -Get chunks of a specific document using the `document-chunks` command: - -```bash -r2r document-chunks --document-id doc1 --offset 0 --limit 10 -``` - - - - - The ID of the document to retrieve chunks for. - - - - The offset to start from. Defaults to 0. - - - - The maximum number of nodes to return. Defaults to 100. - - - - An optional value to return the vectors associated with each chunk, defaults to `False`. - - - - diff --git a/docs/documentation/cli/retrieval.mdx b/docs/documentation/cli/retrieval.mdx deleted file mode 100644 index d551d3239..000000000 --- a/docs/documentation/cli/retrieval.mdx +++ /dev/null @@ -1,137 +0,0 @@ ---- -title: 'Retrieval' -description: 'Search and RAG capabilities using the R2R CLI.' ---- - -# Retrieval Operations - -The R2R CLI provides two main retrieval commands: `search` and `rag` (Retrieval-Augmented Generation). These commands allow you to query your document collection and generate AI-powered responses based on the retrieved content. - -## Search Command - -The `search` command performs document retrieval using vector search and/or knowledge graph search capabilities. - -```bash -r2r search --query "Your search query" -``` - -### Vector Search Options - -- `--use-vector-search`: Enable vector search (default: true) -- `--filters`: Apply JSON filters to the search results - ```bash - r2r search --filters '{"document_id":{"$in":["doc-id-1", "doc-id-2"]}}' - ``` -- `--search-limit`: Maximum number of search results to return -- `--use-hybrid-search`: Enable hybrid search combining vector and keyword search -- `--selected-collection-ids`: Specify collection IDs to search within as JSON array -- `--search-strategy`: Choose between "vanilla" search or advanced methods like query fusion or HyDE - -### Knowledge Graph Search Options - -- `--use-kg-search`: Enable knowledge graph search -- `--kg-search-type`: Choose between "local" or "global" search -- `--kg-search-level`: Specify the level for global KG search -- `--entity-types`: Filter by entity types (as JSON) -- `--relationships`: Filter by relationship types (as JSON) -- `--max-community-description-length`: Set maximum length for community descriptions -- `--local-search-limits`: Set limits for local search (as JSON) - -## RAG Command - -The `rag` command combines search capabilities with AI generation to provide contextual responses based on your document collection. - -```bash -r2r rag --query "Your question" -``` - -### Generation Options - -- `--stream`: Stream the response in real-time -- `--rag-model`: Specify the model to use for generation - -### Vector Search Settings - -- `--use-vector-search`: Enable vector search (default: true) -- `--filters`: Apply JSON filters to search results -- `--search-limit`: Maximum number of search results (default: 10) -- `--use-hybrid-search`: Enable hybrid search -- `--selected-collection-ids`: Specify collection IDs to search within -- `--search-strategy`: Choose search method (default: "vanilla") - -### Knowledge Graph Settings - -- `--use-kg-search`: Enable knowledge graph search -- `--kg-search-type`: Set to "local" or "global" (default: "local") -- `--kg-search-level`: Specify cluster level for Global KG search -- `--kg-search-model`: Choose the model for KG agent -- `--entity-types`: Filter by entity types (as JSON) -- `--relationships`: Filter by relationship types (as JSON) -- `--max-community-description-length`: Set maximum community description length -- `--local-search-limits`: Set limits for local search (as JSON) - -## Examples - -### Basic Search - -```bash -# Simple vector search -r2r search --query "What is quantum computing?" - -# Search with filters -r2r search --query "quantum computing" --filters '{"category": "physics"}' -``` - -### Advanced Search - -```bash -# Hybrid search with collection filtering -r2r search --query "quantum computing" \ - --use-hybrid-search \ - --selected-collection-ids '["physics-collection", "computing-collection"]' - -# Knowledge graph search -r2r search --query "quantum computing relationships" \ - --use-kg-search \ - --kg-search-type "local" \ - --entity-types '["Concept", "Technology"]' -``` - -### Basic RAG - -```bash -# Simple RAG query -r2r rag --query "Explain quantum computing" - -# Streaming RAG response -r2r rag --query "Explain quantum computing" --stream -``` - -### Advanced RAG - -```bash -# RAG with custom model and hybrid search -r2r rag --query "Explain quantum computing" \ - --rag-model "gpt-4" \ - --use-hybrid-search \ - --search-limit 20 - -# RAG with knowledge graph integration -r2r rag --query "How do quantum computers relate to cryptography?" \ - --use-kg-search \ - --kg-search-type "global" \ - --relationships '["ENABLES", "IMPACTS"]' \ - --stream -``` - -## Tips for Effective Retrieval - -1. **Refine Your Queries**: Be specific and clear in your search queries to get more relevant results. - -2. **Use Filters**: Narrow down results using filters when you know specific document characteristics. - -3. **Combine Search Types**: Use hybrid search and knowledge graph capabilities together for more comprehensive results. - -4. **Adjust Search Limits**: Modify the search limit based on your needs - higher limits for broad topics, lower limits for specific queries. - -5. **Stream Long Responses**: Use the `--stream` option with RAG for better user experience with longer generations. diff --git a/docs/documentation/cli/server.mdx b/docs/documentation/cli/server.mdx deleted file mode 100644 index c19184117..000000000 --- a/docs/documentation/cli/server.mdx +++ /dev/null @@ -1,160 +0,0 @@ ---- -title: 'Server' -description: 'Managing the R2R server with the CLI.' ---- - -## Server Management - -### Check Server Health - -Check the health of the server using the `health` command: - -````bash -r2r health -```` - -This command has no additional arguments. - -### Check Server Stats - -Check the server stats using the `server-stats` command: - -````bash -r2r server-stats -```` - -This command has no additional arguments. - -### Retrieve Logs - -Retrieve logs with an optional type filter using the `logs` command: - -````bash -r2r logs --offset 0 --limit 10 --run-type-filter ingestion -```` - - - - - Pagination offset. Default is None. - - - - Pagination limit. Defaults to 100. - - - - Filter for log types. - - - - -### Bring Down Docker Compose - -Bring down the Docker Compose setup and attempt to remove the network if necessary using the `docker-down` command: - -````bash -r2r docker-down --volumes --remove-orphans --project-name my-project -```` - - - - - Remove named volumes declared in the `volumes` section of the Compose file. - - - - Remove containers for services not defined in the Compose file. - - - - Which Docker Compose project to bring down. - - - - -### Generate System Report - -Generate a system report including R2R version, Docker info, and OS details using the `generate-report` command: - -````bash -r2r generate-report -```` - -This command has no additional arguments. - -### Start R2R Server - -Start the R2R server using the `serve` command: - -````bash -r2r serve --host localhost --port 8000 --docker --full --project-name my-project --config-name my-config --config-path /path/to/config --build --image my-image --image-env prod --exclude-postgres -```` - - - - - Host to run the server on. Default is None. - - - - Port to run the server on. Default is None. - - - - Run using Docker. - - - - Run the full R2R compose? This includes Hatchet and Unstructured. - - - - Project name for Docker deployment. Default is None. - - - - Name of the R2R configuration to use. Default is None. - - - - Path to a custom R2R configuration file. Default is None. - - - - Run in debug mode. Only for development. - - - - Docker image to use. - - - - Which dev environment to pull the image from? Default is "prod". - - - - Excludes creating a Postgres container in the Docker setup. - - - - -### Update R2R - -Update the R2R package to the latest version using the `update` command: - -````bash -r2r update -```` - -This command has no additional arguments. - -### Print R2R Version - -Print the version of R2R using the `version` command: - -````bash -r2r version -```` - -This command has no additional arguments. diff --git a/docs/documentation/configuration/ingestion/embedding.mdx b/docs/documentation/configuration/ingestion/embedding.mdx deleted file mode 100644 index e5db525ec..000000000 --- a/docs/documentation/configuration/ingestion/embedding.mdx +++ /dev/null @@ -1,233 +0,0 @@ ---- -title: 'Embedding' ---- - -## Embedding Provider - -By default, R2R uses the LiteLLM framework to communicate with various cloud embedding providers. To customize the embedding settings: - -```toml r2r.toml -[embedding] -provider = "litellm" -base_model = "openai/text-embedding-3-small" -base_dimension = 512 -batch_size = 128 -add_title_as_prefix = false -rerank_model = "None" -concurrent_request_limit = 256 -``` - -Let's break down the embedding configuration options: - -- `provider`: Choose from `ollama`, `litellm` and `openai`. R2R defaults to using the LiteLLM framework for maximum embedding provider flexibility. -- `base_model`: Specifies the embedding model to use. Format is typically "provider/model-name" (e.g., `"openai/text-embedding-3-small"`). -- `base_dimension`: Sets the dimension of the embedding vectors. Should match the output dimension of the chosen model. -- `batch_size`: Determines the number of texts to embed in a single API call. Larger values can improve throughput but may increase latency. -- `add_title_as_prefix`: When true, prepends the document title to the text before embedding, providing additional context. -- `rerank_model`: Specifies a model for reranking results. Set to "None" to disable reranking (note: not supported by LiteLLMEmbeddingProvider). -- `concurrent_request_limit`: Sets the maximum number of concurrent embedding requests to manage load and avoid rate limiting. - - Embedding providers for an R2R system cannot be configured at runtime and are instead configured server side. - - -### Supported LiteLLM Providers - -Support for any of the embedding providers listed below is provided through LiteLLM. - - - Example configuration: - ```toml example r2r.toml - provider = "litellm" - base_model = "openai/text-embedding-3-small" - base_dimension = 512 - ``` - - ```bash - export OPENAI_API_KEY=your_openai_key - # .. set other environment variables - - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - text-embedding-3-small - - text-embedding-3-large - - text-embedding-ada-002 - - For detailed usage instructions, refer to the [LiteLLM OpenAI Embedding documentation](https://docs.litellm.ai/docs/embedding/supported_embedding#openai-embedding-models). - - - - Example configuration: - ```toml example r2r.toml - provider = "litellm" - base_model = "azure/" - base_dimension = XXX - ``` - - ```bash - export AZURE_API_KEY=your_azure_api_key - export AZURE_API_BASE=your_azure_api_base - export AZURE_API_VERSION=your_azure_api_version - # .. set other environment variables - - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - text-embedding-ada-002 - - For detailed usage instructions, refer to the [LiteLLM Azure Embedding documentation](https://docs.litellm.ai/docs/embedding/supported_embedding#azure-openai-embedding-models). - - - - Anthropic does not currently offer embedding models. Consider using OpenAI or another provider for embeddings. - - - - Example configuration: - ```toml example r2r.toml - provider = "litellm" - base_model = "cohere/embed-english-v3.0" - base_dimension = 1_024 - ``` - - ```bash - export COHERE_API_KEY=your_cohere_api_key - # .. set other environment variables - - r2r serve --config-path=r2r.toml - ``` - - Supported models include: - - embed-english-v3.0 - - embed-english-light-v3.0 - - embed-multilingual-v3.0 - - embed-multilingual-light-v3.0 - - embed-english-v2.0 - - embed-english-light-v2.0 - - embed-multilingual-v2.0 - - For detailed usage instructions, refer to the [LiteLLM Cohere Embedding documentation](https://docs.litellm.ai/docs/embedding/supported_embedding#cohere-embedding-models). - - - - - - When running with Ollama, additional changes are recommended for the to the `r2r.toml` file. In addition to using the `ollama` provider directly, we recommend restricting the `concurrent_request_limit` in order to avoid exceeding the throughput of your Ollama server. - ```toml example r2r.toml - [embedding] - provider = "ollama" - base_model = "ollama/mxbai-embed-large" - base_dimension = 1_024 - batch_size = 32 - add_title_as_prefix = true - ``` - - - ```bash - # Ensure your Ollama server is running - # Default Ollama server address: http://localhost:11434 - # <-- OR --> - # Use `r2r --config-name=local_llm serve --docker` - # which bundles ollama with R2R in Docker by default! - - r2r serve --config-path=r2r.toml - ``` - - Then, deploy your R2R server with `r2r serve --config-path=r2r.toml `. - - - - Example configuration: - - ```toml example r2r.toml - [embedding] - provider = "litellm" - base_model = "huggingface/microsoft/codebert-base" - base_dimension = 768 - ``` - - ```python - export HUGGINGFACE_API_KEY=your_huggingface_api_key - - r2r serve --config-path=r2r.toml - ``` - LiteLLM supports all Feature-Extraction Embedding models on HuggingFace. - - For detailed usage instructions, refer to the [LiteLLM HuggingFace Embedding documentation](https://docs.litellm.ai/docs/embedding/supported_embedding#huggingface-embedding-models). - - - - Example configuration: - - ```toml example r2r.toml - provider = "litellm" - base_model = "bedrock/amazon.titan-embed-text-v1" - base_dimension = 1_024 - ``` - - ```bash - export AWS_ACCESS_KEY_ID=your_access_key - export AWS_SECRET_ACCESS_KEY=your_secret_key - export AWS_REGION_NAME=your_region_name - # .. set other environment variables - - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - amazon.titan-embed-text-v1 - - cohere.embed-english-v3 - - cohere.embed-multilingual-v3 - - For detailed usage instructions, refer to the [LiteLLM Bedrock Embedding documentation](https://docs.litellm.ai/docs/embedding/supported_embedding#bedrock-embedding). - - - - - Example configuration: - ```toml example r2r.toml - provider = "litellm" - base_model = "vertex_ai/textembedding-gecko" - base_dimension = 768 - ``` - - ```bash - export GOOGLE_APPLICATION_CREDENTIALS=path/to/your/credentials.json - export VERTEX_PROJECT=your_project_id - export VERTEX_LOCATION=your_project_location - # .. set other environment variables - - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - textembedding-gecko - - textembedding-gecko-multilingual - - textembedding-gecko@001 - - textembedding-gecko@003 - - text-embedding-preview-0409 - - text-multilingual-embedding-preview-0409 - - For detailed usage instructions, refer to the [LiteLLM Vertex AI Embedding documentation](https://docs.litellm.ai/docs/embedding/supported_embedding#vertex-ai-embedding-models). - - - - Example Configuration - ```toml example r2r.toml - provider = "litellm" - base_model = "voyage/voyage-01" - base_dimension = 1_024 - ``` - - ```bash - export VOYAGE_API_KEY=your_voyage_api_key - # .. set other environment variables - - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - voyage-01 - - voyage-lite-01 - - voyage-lite-01-instruct - - For detailed usage instructions, refer to the [LiteLLM Voyage AI Embedding documentation](https://docs.litellm.ai/docs/embedding/supported_embedding#voyage-ai-embedding-models). - - diff --git a/docs/documentation/configuration/ingestion/overview.mdx b/docs/documentation/configuration/ingestion/overview.mdx deleted file mode 100644 index 3830f3b29..000000000 --- a/docs/documentation/configuration/ingestion/overview.mdx +++ /dev/null @@ -1,207 +0,0 @@ ---- -title: 'Overview' -description: 'Configure your R2R ingestion pipeline' ---- -## Introduction -R2R's ingestion pipeline efficiently processes various document formats, transforming them into searchable content. It seamlessly integrates with vector databases and knowledge graphs for optimal retrieval and analysis. - -### Implementation Options -R2R offers two main implementations for ingestion: -- **Light**: Uses R2R's **built-in** ingestion logic, supporting a wide range of file types including TXT, JSON, HTML, PDF, DOCX, PPTX, XLSX, CSV, Markdown, images, audio, and video. For high-quality PDF parsing, it is recommended to use the zerox parser. -- **Full**: Leverages **Unstructured's** open-source [ingestion platform](https://docs.unstructured.io/open-source/introduction/overview) to handle supported file types. This is the default for the 'full' installation and provides more advanced parsing capabilities. - -## Core Concepts - -### Document Processing Pipeline -Inside R2R, ingestion refers to the complete pipeline for processing input data: -- Parsing files into text -- Chunking text into semantic units -- Generating embeddings -- Storing data for retrieval - -Ingested files are stored with an associated document identifier and user identifier to enable comprehensive management. - -### Multimodal Support -R2R has recently expanded its capabilities to include multimodal foundation models. In addition to using such models by default for images, R2R can now use them on PDFs by configuring the parser override: - -```json -"ingestion_config": { - "parser_overrides": { - "pdf": "zerox" - } -} -``` - -## Configuration - -### Key Configuration Areas -Many settings are managed by the `r2r.toml` configuration file: - -```toml -[database] -provider = "postgres" - -[ingestion] -provider = "r2r" -chunking_strategy = "recursive" -chunk_size = 1_024 -chunk_overlap = 512 -excluded_parsers = ["mp4"] - -[embedding] -provider = "litellm" -base_model = "openai/text-embedding-3-small" -base_dimension = 512 -batch_size = 128 -add_title_as_prefix = false -rerank_model = "None" -concurrent_request_limit = 256 -``` - -### Configuration Impact -These settings directly influence how R2R performs ingestion: - -1. **Database Configuration** - - Configures Postgres database for semantic search and document management - - Used during retrieval to find relevant document chunks via vector similarity - -2. **Ingestion Settings** - - Determines file type processing and text conversion methods - - Controls text chunking protocols and granularity - - Affects information storage and retrieval precision - -3. **Embedding Configuration** - - Defines model and parameters for text-to-vector conversion - - Used during retrieval to embed user queries - - Enables vector comparison against stored document embeddings - -## Document Management - -### Document Ingestion -The system provides several methods for ingesting documents: - -1. **File Ingestion** -```python -file_paths = ['path/to/file1.txt', 'path/to/file2.txt'] -metadatas = [{'key1': 'value1'}, {'key2': 'value2'}] - -ingest_response = client.ingest_files( - file_paths=file_paths, - metadatas=metadatas, - ingestion_config={ - "provider": "unstructured_local", - "strategy": "auto", - "chunking_strategy": "by_title", - "new_after_n_chars": 256, - "max_characters": 512 - } -) -``` - -2. **Direct Chunk Ingestion** -```python -chunks = [ - { - "text": "Sample text chunk 1...", - }, - { - "text": "Sample text chunk 2...", - } -] - -ingest_response = client.ingest_chunks( - chunks=chunks, - metadata={"title": "Sample", "source": "example"} -) -``` - -### Document Updates -Update existing documents while maintaining version history: - -```python -update_response = client.update_files( - file_paths=file_paths, - document_ids=document_ids, - metadatas=[{"status": "reviewed"}] -) -``` - -### Vector Index Management - -#### Creating Indices -Vector indices improve search performance for large collections: - -```python -create_response = client.create_vector_index( - table_name="vectors", - index_method="hnsw", - index_measure="cosine_distance", - index_arguments={"m": 16, "ef_construction": 64}, - concurrently=True -) -``` - -Important considerations for index creation: -- Resource intensive process -- Requires pre-warming for optimal performance -- Parameters affect build time and search quality -- Monitor system resources during creation - -#### Managing Indices -List and delete indices as needed: - -```python -# List indices -indices = client.list_vector_indices(table_name="vectors") - -# Delete index -delete_response = client.delete_vector_index( - index_name="index_name", - table_name="vectors", - concurrently=True -) -``` - -## Troubleshooting - -### Common Issues and Solutions - -1. **Ingestion Failures** - - Verify file permissions and paths - - Check supported file formats - - Ensure metadata matches file_paths - - Monitor memory usage - -2. **Chunking Issues** - - Large chunks may impact retrieval quality - - Small chunks may lose context - - Adjust overlap for better context preservation - -3. **Vector Index Performance** - - Monitor creation time - - Check memory usage - - Verify warm-up queries - - Consider rebuilding if quality degrades - -## Pipeline Architecture -The ingestion pipeline consists of several key components: - -```mermaid -graph TD - A[Input Documents] --> B[Parsing Pipe] - B --> C[Embedding Pipeline] - B --> D[Knowledge Graph Pipeline] - C --> E[Vector Database] - D --> F[Knowledge Graph Database] -``` - -This modular design allows for customization and extension of individual components while maintaining robust document processing capabilities. - -## Next Steps - -For more detailed information on configuring specific components of the ingestion pipeline, please refer to the following pages: - -- [Parsing & Chunking Configuration](/documentation/configuration/ingestion/parsing_and_chunking) -- [Embedding Configuration](/documentation/configuration/ingestion/embedding) -- [Knowledge Graph Configuration](/documentation/configuration/knowledge-graph/overview) -- [Retrieval Configuration](/documentation/configuration/retrieval/overview) diff --git a/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx b/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx deleted file mode 100644 index 425605304..000000000 --- a/docs/documentation/configuration/ingestion/parsing_and_chunking.mdx +++ /dev/null @@ -1,266 +0,0 @@ -## Parsing & Chunking - -R2R supports different parsing and chunking providers to extract text from various document formats and break it down into manageable pieces for efficient processing and retrieval. - -To configure the parsing and chunking settings, update the `[ingestion]` section in your `r2r.toml` file: - -```toml -[ingestion] -provider = "r2r" # or "unstructured_local" or "unstructured_api" -# ... provider-specific settings ... -``` - -### Runtime Configuration - -In addition to configuring parsing and chunking settings in the `r2r.toml` file, you can also customize these settings at runtime when ingesting files using the Python SDK. This allows for more flexibility and control over the ingestion process on a per-file or per-request basis. - -Some of the configurable options include: - -- Chunking strategy (e.g., "recursive", "by_title", "basic") -- Chunk size and overlap -- Excluded parsers -- Provider-specific settings (e.g., max characters, overlap, languages) - - -An exhaustive list of runtime ingestion inputs to the `ingest-files` endpoint is shown below: - - - A list of file paths or directory paths to ingest. If a directory path is provided, all files within the directory and its subdirectories will be ingested. - - - - An optional list of metadata dictionaries corresponding to each file. If provided, the length should match the number of files being ingested. - - - - An optional list of document IDs to assign to the ingested files. If provided, the length should match the number of files being ingested. - - - - An optional list of version strings for the ingested files. If provided, the length should match the number of files being ingested. - - - - The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. Learn more about [configuration here](/documentation/configuration/ingestion/parsing_and_chunking). - - - Which R2R ingestion provider to use. Options are "r2r". - - - Only `recursive` is currently supported. - - - The target size for output chunks. - - - The target overlap fraction for output chunks - - - Which parsers to exclude from inside R2R. - - - - - - Which unstructured ingestion provider to use. Options are "unstructured_local", or "unstructured_api". - - - - Sets a maximum size on output chunks. - - - - Combine chunks smaller than this number of characters. - - - - Maximum number of characters per chunk. - - - - Whether to include coordinates in the output. - - - - Encoding to use for text files. - - - - Types of image blocks to extract. - - - - Content type for uncompressed gzip files. - - - - Name of the high-resolution model to use. - - - - Whether to include original elements in the output. - - - - Whether to include page breaks in the output. - - - - List of languages to consider for text processing. - - - - Whether to allow sections to span multiple pages. - - - - Start a new chunk after this many characters. - - - - Languages to use for OCR. - - - - Format of the output. - - - - Number of characters to overlap between chunks. - - - - Whether to overlap all chunks. - - - - Whether to infer table structure in PDFs. - - - - Threshold for considering chunks similar. - - - - Types of tables to skip inferring. - - - - Concurrency level for splitting PDFs. - - - - Whether to split PDFs by page. - - - - Page number to start processing from. - - - - Strategy for processing. Options are "auto", "fast", or "hi_res". - - - - Strategy for chunking. Options are "by_title" or "basic". - - - - Whether to generate unique IDs for elements. - - - - Whether to keep XML tags in the output. - - - - - -For a comprehensive list of available runtime configuration options and examples of how to use them, refer to the [Python SDK Ingestion Documentation](/documentation/python-sdk/ingestion). - - -### Supported Providers - -R2R offers two main parsing and chunking providers: - -1. **R2R (default for 'light' installation)**: - - Uses R2R's built-in parsing and chunking logic. - - Supports a wide range of file types, including TXT, JSON, HTML, PDF, DOCX, PPTX, XLSX, CSV, Markdown, images, audio, and video. - - Configuration options: - ```toml - [ingestion] - provider = "r2r" - chunking_strategy = "recursive" - chunk_size = 1_024 - chunk_overlap = 512 - excluded_parsers = ["mp4"] - ``` - - `chunking_strategy`: The chunking method ("recursive"). - - `chunk_size`: The target size for each chunk. - - `chunk_overlap`: The number of characters to overlap between chunks. - - `excluded_parsers`: List of parsers to exclude (e.g., ["mp4"]). - -2. **Unstructured (default for 'full' installation)**: - - Leverages Unstructured's open-source ingestion platform. - - Provides more advanced parsing capabilities. - - Configuration options: - ```toml - [ingestion] - provider = "unstructured_local" - strategy = "auto" - chunking_strategy = "by_title" - new_after_n_chars = 512 - max_characters = 1_024 - combine_under_n_chars = 128 - overlap = 20 - ``` - - `strategy`: The overall chunking strategy ("auto", "fast", or "hi_res"). - - `chunking_strategy`: The specific chunking method ("by_title" or "basic"). - - `new_after_n_chars`: Soft maximum size for a chunk. - - `max_characters`: Hard maximum size for a chunk. - - `combine_under_n_chars`: Minimum size for combining small sections. - - `overlap`: Number of characters to overlap between chunks. - -### Supported File Types - -Both R2R and Unstructured providers support parsing a wide range of file types, including: - -- TXT, JSON, HTML, PDF, DOCX, PPTX, XLSX, CSV, Markdown, images (BMP, GIF, HEIC, JPEG, JPG, PNG, SVG, TIFF), audio (MP3), video (MP4), and more. - -Refer to the [Unstructured documentation](https://docs.unstructured.io/welcome) for more details on their ingestion capabilities and limitations. - -### Configuring Parsing & Chunking - -To configure parsing and chunking settings, update the `[ingestion]` section in your `r2r.toml` file with the desired provider and its specific settings. - -For example, to use the R2R provider with custom chunk size and overlap: - -```toml -[ingestion] -provider = "r2r" -chunking_strategy = "recursive" -chunk_size = 2_048 -chunk_overlap = 256 -excluded_parsers = ["mp4"] -``` - -Or, to use the Unstructured provider with a specific chunking strategy and character limits: - -```toml -[ingestion] -provider = "unstructured_local" -strategy = "hi_res" -chunking_strategy = "basic" -new_after_n_chars = 1_000 -max_characters = 2_000 -combine_under_n_chars = 256 -overlap = 50 -``` - -Adjust the settings based on your specific requirements and the characteristics of your input documents. - -### Next Steps - -- Learn more about [Embedding Configuration](/documentation/configuration/ingestion/embedding). -- Explore [Knowledge Graph Configuration](/documentation/configuration/knowledge-graph/overview). -- Check out [Retrieval Configuration](/documentation/configuration/retrieval/overview). diff --git a/docs/documentation/configuration/introduction.mdx b/docs/documentation/configuration/introduction.mdx deleted file mode 100644 index 024944a11..000000000 --- a/docs/documentation/configuration/introduction.mdx +++ /dev/null @@ -1,79 +0,0 @@ ---- -title: 'Introduction' -description: 'Learn how to configure your R2R deployment' -icon: 'gear' ---- - -## Introduction - -R2R offers a flexible configuration system that allows you to customize your Retrieval-Augmented Generation (RAG) applications. This guide introduces the key concepts and methods for configuring R2R. - -## Configuration Levels - -R2R supports two main levels of configuration: - -1. **Server Configuration**: Define default server-side settings. -2. **Runtime Configuration**: Dynamically override settings when making API calls. - -## Server Configuration - -The default settings for a `light` R2R installation are specified in the [`r2r.toml`](https://github.com/SciPhi-AI/R2R/blob/main/r2r.toml) file. - -When doing a `full` installation the R2R CLI uses the [`full.toml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/configs/full.toml) to override some of the default light default settings with those of the added providers. - -To create your own custom configuration: - -1. Create a new file named `my_r2r.toml` in your project directory. -2. Add only the settings you wish to customize. For example: - -```toml my_r2r.toml -[embedding] -provider = "litellm" -base_model = "text-embedding-3-small" -base_dimension = 1536 - -[completion] - [completion.generation_config] - model = "anthropic/claude-3-opus-20240229" -``` - -3. Launch R2R with the CLI using your custom configuration: - -```bash -r2r serve --config-path=my_r2r.toml -``` - -R2R will use your specified settings, falling back to defaults for any unspecified options. - -## Runtime Configuration - -When calling endpoints, you can override server configurations on-the-fly. This allows for dynamic control over search settings, model selection, prompt customization, and more. - -For example, using the Python SDK: - -```python -client = R2RClient("http://localhost:7272") - -response = client.rag( - "Who was Aristotle?", - rag_generation_config={ - "model": "anthropic/claude-3-haiku-20240307", - "temperature": 0.7 - }, - vector_search_settings={ - "search_limit": 100, - "use_hybrid_search": True - } -) -``` - -## Next Steps - -For more detailed information on configuring specific components of R2R, please refer to the following pages: - -- [Postgres Configuration](/documentation/configuration/postgres) -- [LLM Configuration](/documentation/configuration/llm) -- [RAG Configuration](/documentation/configuration/rag) -- [Ingestion Configuration](/documentation/configuration/ingestion/overview) -- [Knowledge Graph Configuration](/documentation/configuration/knowledge-graph/overview) -- [Retrieval Configuration](/documentation/configuration/retrieval/overview) diff --git a/docs/documentation/configuration/knowledge-graph/enrichment.mdx b/docs/documentation/configuration/knowledge-graph/enrichment.mdx deleted file mode 100644 index 291e339f3..000000000 --- a/docs/documentation/configuration/knowledge-graph/enrichment.mdx +++ /dev/null @@ -1,33 +0,0 @@ ---- -title: 'KG Enrichment' -description: 'Configuration for Restructuring data after ingestion using Knowledge Graphs' ---- - -It is often effective to restructure data after ingestion to improve retrieval performance and accuracy. R2R supports knowledge graphs for data restructuring. You can find out more about creating knowledge graphs in the [Knowledge Graphs Guide](/cookbooks/graphrag). - -You can configure knowledge graph enrichment in the R2R configuration file. To do this, just set the `kg.kg_enrichment_settings` section in the configuration file. Following is the sample format from the example configuration file `r2r.toml`. - -```toml -[database] -provider = "postgres" -batch_size = 256 - - [database.kg_creation_settings] - kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" - entity_types = [] # if empty, all entities are extracted - relation_types = [] # if empty, all relations are extracted - fragment_merge_count = 4 # number of fragments to merge into a single extraction - max_knowledge_triples = 100 # max number of triples to extract for each document chunk - generation_config = { model = "openai/gpt-4o-mini" } # and other generation params - - [database.kg_enrichment_settings] - max_description_input_length = 65536 # increase if you want more comprehensive descriptions - max_summary_input_length = 65536 - generation_config = { model = "openai/gpt-4o-mini" } # and other generation params - leiden_params = {} # more params in graspologic/partition/leiden.py - - [database.kg_search_settings] - generation_config = { model = "openai/gpt-4o-mini" } -``` - -Next you can do GraphRAG with the knowledge graph. Find out more about GraphRAG in the [GraphRAG Guide](/cookbooks/graphrag). diff --git a/docs/documentation/configuration/knowledge-graph/overview.mdx b/docs/documentation/configuration/knowledge-graph/overview.mdx deleted file mode 100644 index 0aafe3175..000000000 --- a/docs/documentation/configuration/knowledge-graph/overview.mdx +++ /dev/null @@ -1,79 +0,0 @@ ---- -title: 'Overview' -description: 'Configure your R2R knowledge graph provider.' ---- -## Knowledge Graph Provider - -R2R supports knowledge graph functionality to enhance document understanding and retrieval. By default, R2R creates the graph by clustering with `graspologic` and saving the output triples and relationships into Postgres. We are actively working to integrate with [Memgraph](https://memgraph.com/docs). You can find out more about creating knowledge graphs in the [GraphRAG Cookbook](/cookbooks/graphrag). - - -To configure the knowledge graph settings for your project, edit the `kg` section in your `r2r.toml` file: - -```toml r2r.toml -[database] -provider = "postgres" -batch_size = 256 -kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" - - [database.kg_creation_settings] - entity_types = [] # if empty, all entities are extracted - relation_types = [] # if empty, all relations are extracted - generation_config = { model = "openai/gpt-4o-mini" } - max_knowledge_triples = 100 # max number of triples to extract for each document chunk - fragment_merge_count = 4 # number of fragments to merge into a single extraction - - [database.kg_enrichment_settings] - max_description_input_length = 65536 # increase if you want more comprehensive descriptions - max_summary_input_length = 65536 - generation_config = { model = "openai/gpt-4o-mini" } # and other generation params below - leiden_params = {} - - [database.kg_search_settings] - generation_config = { model = "openai/gpt-4o-mini" } -``` - - -Let's break down the knowledge graph configuration options: - -- `provider`: Specifies the knowledge graph provider. Currently, "postgres" is supported. -- `batch_size`: Determines the number of entities or relationships to process in a single batch during import operations. -- `kg_triples_extraction_prompt`: Specifies the prompt template to use for extracting knowledge graph information from text. -- `kg_creation_settings`: Configuration for the model used in knowledge graph creation. - - `max_knowledge_triples`: The maximum number of knowledge triples to extract for each document chunk. - - `fragment_merge_count`: The number of fragments to merge into a single extraction. - - `generation_config`: Configuration for the model used in knowledge graph creation. -- `kg_enrichment_settings`: Similar configuration for the model used in knowledge graph enrichment. - - `generation_config`: Configuration for the model used in knowledge graph enrichment. - - `leiden_params`: Parameters for the Leiden algorithm. -- `kg_search_settings`: Similar configuration for the model used in knowledge graph search operations. - - - - -Setting configuration values in the `r2r.toml` will override environment variables by default. - - - -### Knowledge Graph Operations - -1. **Entity Management**: Add, update, and retrieve entities in the knowledge graph. -2. **Relationship Management**: Create and query relationships between entities. -3. **Batch Import**: Efficiently import large amounts of data using batched operations. -4. **Vector Search**: Perform similarity searches on entity embeddings. -5. **Community Detection**: Identify and manage communities within the graph. - -### Customization - -You can customize the knowledge graph extraction and search processes by modifying the `kg_triples_extraction_prompt` and adjusting the model configurations in `kg_extraction_settings` and `kg_search_settings`. Moreover, you can customize the LLM models used in various parts of the knowledge graph creation process. All of these options can be selected at runtime, with the only exception being the specified database provider. For more details, refer to the knowledge graph settings in the [search API](/api-reference/endpoint/search). - -By leveraging the knowledge graph capabilities, you can enhance R2R's understanding of document relationships and improve the quality of search and retrieval operations. - - - -## Next Steps - -For more detailed information on configuring specific components of the ingestion pipeline, please refer to the following pages: - -- [Ingestion Configuration](/documentation/configuration/ingestion/overview) -- [Enrichment Configuration](/documentation/configuration/knowledge-graph/enrichment) -- [Retrieval Configuration](/documentation/configuration/retrieval/overview) diff --git a/docs/documentation/configuration/llm.mdx b/docs/documentation/configuration/llm.mdx deleted file mode 100644 index 35e2e2e1c..000000000 --- a/docs/documentation/configuration/llm.mdx +++ /dev/null @@ -1,285 +0,0 @@ ---- -title: 'LLMs' -description: 'Learn how to configure LLMs in your R2R deployment' -icon: 'language' ---- - -R2R uses language models to generate responses based on retrieved context. You can configure R2R's server-side LLM generation settings with the [`r2r.toml`](https://github.com/SciPhi-AI/R2R/blob/main/py/r2r.toml): - -```toml r2r.toml -[completion] -provider = "litellm" -concurrent_request_limit = 16 - - [completion.generation_config] - model = "openai/gpt-4o" - temperature = 0.1 - top_p = 1 - max_tokens_to_sample = 1_024 - stream = false - add_generation_kwargs = {} -``` - -Key generation configuration options: - -- `provider`: The LLM provider (defaults to "LiteLLM" for maximum flexibility). -- `concurrent_request_limit`: Maximum number of concurrent LLM requests. -- `model`: The language model to use for generation. -- `temperature`: Controls the randomness of the output (0.0 to 1.0). -- `top_p`: Nucleus sampling parameter (0.0 to 1.0). -- `max_tokens_to_sample`: Maximum number of tokens to generate. -- `stream`: Enable/disable streaming of generated text. -- `api_base`: The base URL for remote communication, e.g. `https://api.openai.com/v1` - -#### Serving select LLM providers - - - - - ```python - export OPENAI_API_KEY=your_openai_key - # .. set other environment variables - - # Optional - Update default model - # Set '"model": "openai/gpt-4o-mini"' in `r2r.toml` - # then call `r2r serve --config-path=r2r.toml` - r2r serve - ``` - Supported models include: - - openai/gpt-4o - - openai/gpt-4-turbo - - openai/gpt-4 - - openai/gpt-4o-mini - - For a complete list of supported OpenAI models and detailed usage instructions, please refer to the [LiteLLM OpenAI documentation](https://docs.litellm.ai/docs/providers/openai). - - - ```python - export AZURE_API_KEY=your_azure_api_key - export AZURE_API_BASE=your_azure_api_base - export AZURE_API_VERSION=your_azure_api_version - # .. set other environment variables - - # Optional - Update default model - # Set '"model": "azure/"' in `r2r.toml` - r2r serve --config-path=my_r2r.toml - ``` - Supported models include: - - azure/gpt-4o - - azure/gpt-4-turbo - - azure/gpt-4 - - azure/gpt-4o-mini - - azure/gpt-4o-mini - For a complete list of supported Azure models and detailed usage instructions, please refer to the [LiteLLM Azure documentation](https://docs.litellm.ai/docs/providers/azure). - - - - - - ```python - export ANTHROPIC_API_KEY=your_anthropic_key - # export ANTHROPIC_API_BASE=your_anthropic_base_url - # .. set other environment variables - - # Optional - Update default model - # Set '"model": "anthropic/claude-3-opus-20240229"' in `r2r.toml` - r2r serve --config-path=my_r2r.toml - ``` - Supported models include: - - - anthropic/claude-3-5-sonnet-20240620 - - anthropic/claude-3-opus-20240229 - - anthropic/claude-3-sonnet-20240229 - - anthropic/claude-3-haiku-20240307 - - anthropic/claude-2.1 - - For a complete list of supported Anthropic models and detailed usage instructions, please refer to the [LiteLLM Anthropic documentation](https://docs.litellm.ai/docs/providers/anthropic). - - - - - ```python - export GOOGLE_APPLICATION_CREDENTIALS=path/to/your/credentials.json - export VERTEX_PROJECT=your_project_id - export VERTEX_LOCATION=your_project_location - # .. set other environment variables - - # Optional - Update default model - # Set '"model": "vertex_ai/gemini-pro"' in `r2r.toml` - r2r serve --config-path=my_r2r.toml - ``` - - Supported models include: - - vertex_ai/gemini-pro - - vertex_ai/gemini-pro-vision - - vertex_ai/claude-3-opus@20240229 - - vertex_ai/claude-3-sonnet@20240229 - - vertex_ai/mistral-large@2407 - - For a complete list of supported Vertex AI models and detailed usage instructions, please refer to the [LiteLLM Vertex AI documentation](https://docs.litellm.ai/docs/providers/vertex). - - Vertex AI requires additional setup for authentication and project configuration. Refer to the documentation for detailed instructions on setting up service accounts and configuring your environment. - - - - ```python - export AWS_ACCESS_KEY_ID=your_access_key - export AWS_SECRET_ACCESS_KEY=your_secret_key - export AWS_REGION_NAME=your_region_name - # .. set other environment variables - - # Optional - Update default model - # Set '"model": "bedrock/anthropic.claude-v2"' in `r2r.toml` - r2r serve --config-path=my_r2r.toml - ``` - - Supported models include: - - bedrock/anthropic.claude-3-sonnet-20240229-v1:0 - - bedrock/anthropic.claude-v2 - - bedrock/anthropic.claude-instant-v1 - - bedrock/amazon.titan-text-express-v1 - - bedrock/meta.llama2-70b-chat-v1 - - bedrock/mistral.mixtral-8x7b-instruct-v0:1 - - For a complete list of supported AWS Bedrock models and detailed usage instructions, please refer to the [LiteLLM AWS Bedrock documentation](https://docs.litellm.ai/docs/providers/bedrock). - - AWS Bedrock requires boto3 to be installed (`pip install boto3>=1.28.57`). Make sure to set up your AWS credentials properly before using Bedrock models. - - - - ```python - export GROQ_API_KEY=your_groq_api_key - # .. set other environment variables - - # Optional - Update default model - # Set '"model": "groq/llama3-8b-8192"' in `r2r.toml` - r2r serve --config-path=my_r2r.toml - ``` - - Supported models include: - - llama-3.1-8b-instant - - llama-3.1-70b-versatile - - llama-3.1-405b-reasoning - - llama3-8b-8192 - - llama3-70b-8192 - - mixtral-8x7b-32768 - - gemma-7b-it - - For a complete list of supported Groq models and detailed usage instructions, please refer to the [LiteLLM Groq documentation](https://docs.litellm.ai/docs/providers/groq). - - Note: Groq supports ALL models available on their platform. Use the prefix `groq/` when specifying the model name. - - Additional features: - - Supports streaming responses - - Function/Tool calling available for compatible models - - Speech-to-Text capabilities with Whisper model - - - - ```python - # Ensure your Ollama server is running - # Default Ollama server address: http://localhost:11434 - # <-- OR --> - # Use `r2r --config-name=local_llm serve --docker` - # which bundles ollama with R2R in Docker by default! - - # Optional - Update default model - # Copy `r2r/examples/configs/local_llm.toml` into `my_r2r_local_llm.toml` - # Set '"model": "ollama/llama3.1"' in `my_r2r_local_llm.toml` - # then call `r2r --config-path=my_r2r_local_llm.toml` - ``` - - Supported models include: - - llama2 - - mistral - - mistral-7B-Instruct-v0.1 - - mixtral-8x7B-Instruct-v0.1 - - codellama - - llava (vision model) - - For a complete list of supported Ollama models and detailed usage instructions, please refer to the [LiteLLM Ollama documentation](https://docs.litellm.ai/docs/providers/ollama). - - Ollama supports local deployment of various open-source models. Ensure you have the desired model pulled and running on your Ollama server. [See here](/documentation/local-rag) for more detailed instructions on local RAG setup. - - - - - ```python - export COHERE_API_KEY=your_cohere_api_key - # .. set other environment variables - - # Optional - Update default model - # Set '"model": "command-r"' in `r2r.toml` - r2r serve --config-path=my_r2r.toml - ``` - - Supported models include: - - command-r - - command-light - - command-r-plus - - command-medium - - For a complete list of supported Cohere models and detailed usage instructions, please refer to the [LiteLLM Cohere documentation](https://docs.litellm.ai/docs/providers/cohere). - - - - - ```python - export ANYSCALE_API_KEY=your_anyscale_api_key - # .. set other environment variables - - # Optional - Update default model - # Set '"model": "anyscale/mistralai/Mistral-7B-Instruct-v0.1"' in `r2r.toml` - r2r serve --config-path=my_r2r.toml - ``` - - Supported models include: - - anyscale/meta-llama/Llama-2-7b-chat-hf - - anyscale/meta-llama/Llama-2-13b-chat-hf - - anyscale/meta-llama/Llama-2-70b-chat-hf - - anyscale/mistralai/Mistral-7B-Instruct-v0.1 - - anyscale/codellama/CodeLlama-34b-Instruct-hf - - For a complete list of supported Anyscale models and detailed usage instructions, please refer to the [Anyscale Endpoints documentation](https://app.endpoints.anyscale.com/). - - - Anyscale supports a wide range of models, including Llama 2, Mistral, and CodeLlama variants. Check the Anyscale Endpoints documentation for the most up-to-date list of available models. - - - - - - - -### Runtime Configuration of LLM Provider - -R2R supports runtime configuration of the LLM provider, allowing you to dynamically change the model or provider for each request. This flexibility enables you to use different models or providers based on specific requirements or use cases. - -### Combining Search and Generation - -When performing a RAG query, you can dynamically set the LLM generation settings: - -```python -response = client.rag( - "What are the latest advancements in quantum computing?", - rag_generation_config={ - "stream": False, - "model": "openai/gpt-4o-mini", - "temperature": 0.7, - "max_tokens": 150 - } -) -``` - -For more detailed information on configuring other search and RAG settings, please refer to the [RAG Configuration documentation](/documentation/configuration/rag). - - -## Next Steps - -For more detailed information on configuring specific components of R2R, please refer to the following pages: - -- [Postgres Configuration](/documentation/configuration/postgres) -- [RAG Configuration](/documentation/configuration/rag) -- [Ingestion Configuration](/documentation/configuration/ingestion/overview) -- [Knowledge Graph Configuration](/documentation/configuration/knowledge-graph/overview) -- [Retrieval Configuration](/documentation/configuration/retrieval/overview) diff --git a/docs/documentation/configuration/postgres.mdx b/docs/documentation/configuration/postgres.mdx deleted file mode 100644 index 559b988ff..000000000 --- a/docs/documentation/configuration/postgres.mdx +++ /dev/null @@ -1,124 +0,0 @@ ---- -title: 'Postgres' -description: "Configure your R2R Postgres database" -icon: "database" ---- - -## Postgres Database - -R2R uses PostgreSQL as the sole provider for relational and vector search queries. This means that Postgres is involved in handling authentication, document management, and search across R2R. For robust search capabilities, R2R leverages the `pgvector` extension and `ts_rank` to implement [customizable hybrid search](/cookbooks/hybrid-search). - - - R2R chose Postgres as its core technology for several reasons: - - - **Versatility**: Postgres is a robust, advanced database that can handle both relational data and vector embeddings. - - **Simplicity**: By using Postgres for both traditional data and vector search, R2R eliminates the need for complex syncing between separate databases. - - **Familiarity**: Many developers are already comfortable with Postgres, making it easier to integrate R2R into existing workflows. - - **Extensibility**: Postgres's rich ecosystem of extensions allows R2R to leverage advanced features and optimizations. - - Read more about [Postgres here](https://www.postgresql.org/). - - -## Postgres Configuration - -To customize the database settings, you can modify the `database` section in your `r2r.toml` file and set corresponding environment variables or provide the settings directly in the configuration file. - -1. Edit the `database` section in your `r2r.toml` file: - -```toml r2r.toml -[database] -provider = "postgres" # currently only `postgres` is supported - -# optional parameters which are typically set in the environment instead: -user = "your_postgres_user" -password = "your_postgres_password" -host = "your_postgres_host" -port = "your_postgres_port" -db_name = "your_database_name" -your_project_name = "your_project_name" -``` - -2. Alternatively, you can set the following environment variables: - -```bash -export R2R_POSTGRES_USER=your_postgres_user -export R2R_POSTGRES_PASSWORD=your_postgres_password -export R2R_POSTGRES_HOST=your_postgres_host -export R2R_POSTGRES_PORT=your_postgres_port -export R2R_POSTGRES_DBNAME=your_database_name -export R2R_PROJECT_NAME=your_project_name -``` - -## Advanced Postgres Features in R2R - -R2R leverages several advanced PostgreSQL features to provide powerful search and retrieval capabilities: - -### pgvector Extension - -R2R uses the `pgvector` extension to enable efficient vector similarity search. This is crucial for semantic search operations. The `collection.py` file defines a custom `Vector` type that interfaces with `pgvector`: - -```python -class Vector(UserDefinedType): - # ... (implementation details) - - class comparator_factory(UserDefinedType.Comparator): - def l2_distance(self, other): - return self.op("<->", return_type=Float)(other) - - def max_inner_product(self, other): - return self.op("<#>", return_type=Float)(other) - - def cosine_distance(self, other): - return self.op("<=>", return_type=Float)(other) -``` - -This allows R2R to perform vector similarity searches using different distance measures. - -### Hybrid Search - -R2R implements a sophisticated hybrid search that combines full-text search and vector similarity search. This approach provides more accurate and contextually relevant results. Key components of the hybrid search include: - -1. **Full-Text Search**: Utilizes PostgreSQL's built-in full-text search capabilities with `ts_rank` and `websearch_to_tsquery`. -2. **Semantic Search**: Performs vector similarity search using `pgvector`. -3. **Reciprocal Rank Fusion (RRF)**: Merges results from full-text and semantic searches. - -The `collection.py` file includes methods for building complex SQL queries that implement this hybrid search approach. - -### GIN Indexing - -R2R uses GIN (Generalized Inverted Index) indexing to optimize full-text searches: - -```python -Index(f"idx_{name}_fts", "fts", postgresql_using="gin"), -``` - -This indexing strategy allows for efficient full-text search and trigram similarity matching. - -### JSON Support - -R2R leverages PostgreSQL's JSONB type for flexible metadata storage: - -```python -Column( - "metadata", - postgresql.JSONB, - server_default=text("'{}'::jsonb"), - nullable=False, -) -``` - -This allows for efficient storage and querying of structured metadata alongside vector embeddings. - -## Performance Considerations - -When setting up PostgreSQL for R2R, consider the following performance optimizations: - -1. **Indexing**: Ensure proper indexing for both full-text and vector searches. R2R automatically creates necessary indexes, but you may need to optimize them based on your specific usage patterns. - -2. **Hardware**: For large-scale deployments, consider using dedicated PostgreSQL instances with sufficient CPU and RAM to handle vector operations efficiently. - -3. **Vacuuming**: Regular vacuuming helps maintain database performance, especially for tables with frequent updates or deletions. - -4. **Partitioning**: For very large datasets, consider table partitioning to improve query performance. - -By leveraging these advanced PostgreSQL features and optimizations, R2R provides a powerful and flexible foundation for building sophisticated retrieval and search systems. diff --git a/docs/documentation/configuration/prompts.mdx b/docs/documentation/configuration/prompts.mdx deleted file mode 100644 index e9db1870a..000000000 --- a/docs/documentation/configuration/prompts.mdx +++ /dev/null @@ -1,207 +0,0 @@ ---- -title: 'Prompts' -description: 'Learn how to configure and manage prompts in your R2R deployment' -icon: 'message-bot' ---- - -## Prompt Management in R2R - -R2R provides a flexible system for managing prompts, allowing you to create, update, retrieve, and delete prompts dynamically. This system is crucial for customizing the behavior of language models and ensuring consistent interactions across your application. - -## Default Prompts - -R2R comes with a set of default prompts that are loaded from YAML files located in the [`py/core/providers/prompts/defaults`](https://github.com/SciPhi-AI/R2R/tree/main/py/core/providers/prompts/defaults) directory. These default prompts provide a starting point for various tasks within the R2R system. - -For example, the default RAG (Retrieval-Augmented Generation) prompt is defined as follows: - -```yaml -default_rag: - template: > - ## Task: - - Answer the query given immediately below given the context which follows later. Use line item references to like [1], [2], ... refer to specifically numbered items in the provided context. Pay close attention to the title of each given source to ensure it is consistent with the query. - - - ### Query: - - {query} - - - ### Context: - - {context} - - - ### Query: - - {query} - - - REMINDER - Use line item references to like [1], [2], ... refer to specifically numbered items in the provided context. - - ## Response: - input_types: - query: str - context: str -``` - -### Default Prompt Usage - -Here's a table showing the purpose of some key default prompts: - -Certainly! I'll create an expanded table that explains all the prompts you've listed, with links to their respective GitHub files. Here's the updated table: - -| Prompt File | Purpose | -|-------------|---------| -| [`default_rag.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/default_rag.yaml) | Default prompt for Retrieval-Augmented Generation (RAG) tasks. It instructs the model to answer queries based on provided context, using line item references. | -| [`graphrag_community_reports.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/graphrag_community_reports.yaml) | Used in GraphRAG to generate reports about communities or clusters in the knowledge graph. | -| [`graphrag_entity_description.yaml.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/graphrag_entity_description.yaml) | System prompt for the "map" phase in GraphRAG, used to process individual nodes or edges. | -| [`graphrag_map_system.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/graphrag_map_system.yaml) | System prompt for the "map" phase in GraphRAG, used to process individual nodes or edges. | -| [`graphrag_reduce_system.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/graphrag_reduce_system.yaml) | System prompt for the "reduce" phase in GraphRAG, used to combine or summarize information from multiple sources. | -| [`graphrag_triples_extraction_few_shot.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/graphrag_triples_extraction_few_shot.yaml) | Few-shot prompt for extracting subject-predicate-object triplets in GraphRAG, with examples. | -| [`hyde.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/hyde.yaml) | Related to Hypothetical Document Embeddings (HyDE) for improving retrieval performance. | -| [`kg_search.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/kg_search.yaml) | Used for searching the knowledge graph, possibly to formulate queries or interpret results. | -| [`kg_search_with_spec.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/kg_search_with_spec.yaml) | Similar to `kg_search.yaml`, but with a specific schema or specification for the search process. | -| [`rag_agent.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/rag_agent.yaml) | Defines the behavior and instructions for the RAG agent, which coordinates the retrieval and generation process. | -| [`rag_context.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/rag_context.yaml) | Used to process or format the context retrieved for RAG tasks. | -| [`rag_fusion.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/rag_fusion.yaml) | Used in RAG fusion techniques, possibly for combining information from multiple retrieved passages. | -| [`system.yaml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/providers/database/prompts/system.yaml) | Contains general system-level prompts or instructions for the R2R system. | - - -You can find the full list of default prompts and their contents in the [defaults directory](https://github.com/SciPhi-AI/R2R/tree/main/py/core/providers/prompts/defaults). - -## Prompt Provider - -R2R uses a postgres class to manage prompts. This allows for storage, retrieval, and manipulation of prompts, leveraging both a PostgreSQL database and YAML files for flexibility and persistence. - -Key features of prompts inside R2R: - -1. **Database Storage**: Prompts are stored in a PostgreSQL table, allowing for efficient querying and updates. -2. **YAML File Support**: Prompts can be loaded from YAML files, providing an easy way to version control and distribute default prompts. -3. **In-Memory Cache**: Prompts are kept in memory for fast access during runtime. - -## Prompt Structure - -Each prompt in R2R consists of: - -- **Name**: A unique identifier for the prompt. -- **Template**: The actual text of the prompt, which may include placeholders for dynamic content. -- **Input Types**: A dictionary specifying the expected types for any dynamic inputs to the prompt. - -## Managing Prompts - -R2R provides several endpoints and SDK methods for managing prompts: - -### Adding a Prompt - -To add a new prompt: - -```python -from r2r import R2RClient - -client = R2RClient() - -response = client.add_prompt( - name="my_new_prompt", - template="Hello, {name}! Welcome to {service}.", - input_types={"name": "str", "service": "str"} -) -``` - -### Updating a Prompt - -To update an existing prompt: - -```python -response = client.update_prompt( - name="my_existing_prompt", - template="Updated template: {variable}", - input_types={"variable": "str"} -) -``` - -### Retrieving a Prompt - -To get a specific prompt: - -```python -response = client.get_prompt( - prompt_name="my_prompt", - inputs={"variable": "example"}, - prompt_override="Optional override text" -) -``` - -### Listing All Prompts - -To retrieve all prompts: - -```python -response = client.get_all_prompts() -``` - -### Deleting a Prompt - -To delete a prompt: - -```python -response = client.delete_prompt("prompt_to_delete") -``` - -## Security Considerations - -Access to prompt management functions is restricted to superusers to prevent unauthorized modifications to system prompts. Ensure that only trusted administrators have superuser access to your R2R deployment. - -## Best Practices - -1. **Version Control**: Store your prompts in version-controlled YAML files for easy tracking of changes and rollbacks. -2. **Consistent Naming**: Use a consistent naming convention for your prompts to make them easy to identify and manage. -3. **Input Validation**: Always specify input types for your prompts to ensure that they receive the correct data types. -4. **Regular Audits**: Periodically review and update your prompts to ensure they remain relevant and effective. -5. **Testing**: Test prompts thoroughly before deploying them to production, especially if they involve complex logic or multiple input variables. - -## Advanced Usage - -### Dynamic Prompt Loading - -R2R's prompt system allows for dynamic loading of prompts from both the database and YAML files. This enables you to: - -1. Deploy default prompts with your application. -2. Override or extend these prompts at runtime. -3. Easily update prompts without redeploying your entire application. - -### Prompt Templating - -The prompt template system in R2R supports complex string formatting. You can include conditional logic, loops, and other Python expressions within your prompts using a templating engine. - -Example of a more complex prompt template: - -```python -complex_template = """ -Given the following information: -{% for item in data %} -- {{ item.name }}: {{ item.value }} -{% endfor %} - -Please provide a summary that {% if include_analysis %}includes an analysis of the data{% else %}lists the key points{% endif %}. -""" - -client.add_prompt( - name="complex_summary", - template=complex_template, - input_types={"data": "list", "include_analysis": "bool"} -) -``` - -This flexibility allows you to create highly dynamic and context-aware prompts that can adapt to various scenarios in your application. - -## Conclusion - -R2R's prompt management system provides a powerful and flexible way to control the behavior of language models in your application. By leveraging this system effectively, you can create more dynamic, context-aware, and maintainable AI-powered features. - -For more detailed information on other aspects of R2R configuration, please refer to the following pages: - -- [LLM Configuration](/documentation/configuration/llm) -- [RAG Configuration](/documentation/configuration/rag) -- [Postgres Configuration](/documentation/configuration/postgres) -- [Ingestion Configuration](/documentation/configuration/ingestion/overview) diff --git a/docs/documentation/configuration/rag.mdx b/docs/documentation/configuration/rag.mdx deleted file mode 100644 index 2707397ad..000000000 --- a/docs/documentation/configuration/rag.mdx +++ /dev/null @@ -1,137 +0,0 @@ ---- -title: 'RAG' -description: 'Learn how to configure RAG in your R2R deployment' -icon: 'brain' ---- - -## RAG Customization - -RAG (Retrieval-Augmented Generation) in R2R can be extensively customized to suit various use cases. The main components for customization are: - -1. **Generation Configuration**: Control the language model's behavior. -2. **Search Settings**: Fine-tune the retrieval process. -3. **Task Prompt Override**: Customize the system prompt for specific tasks. - - -### LLM Provider Configuration - -Refer to the LLM configuration [page here](/documentation/configuration/llm). - - -### Retrieval Configuration - -Refer to the retrieval configuration [page here](/documentation/configuration/retrieval/overview). - - -### Combining LLM and Retrieval Configuration for RAG - - - -The `rag_generation_config` parameter allows you to customize the language model's behavior. Default settings are set on the server-side using the `r2r.toml`, as described in in previous configuraiton guides. These settings can be overridden at runtime as shown below: - -```python -# Configure vector search -vector_search_settings = { - "use_vector_search": True, - "search_limit": 20, - "use_hybrid_search": True, - "selected_collection_ids": ["c3291abf-8a4e-5d9d-80fd-232ef6fd8526"] -} - -# Configure graphRAG search -kg_search_settings = { - "use_kg_search": True, - "kg_search_type": "local", - "kg_search_level": None, - "generation_config": { - "model": "gpt-4", - "temperature": 0.1 - }, - "entity_types": ["Person", "Organization"], - "relationships": ["worksFor", "foundedBy"], - "max_community_description_length": 65536, - "max_llm_queries_for_global_search": 250, - "local_search_limits": {"__Entity__": 20, "__Relationship__": 20, "__Community__": 20} -} - -# Configure LLM generation -rag_generation_config = { - "model": "anthropic/claude-3-opus-20240229", - "temperature": 0.7, - "top_p": 0.95, - "max_tokens_to_sample": 1500, - "stream": True, - "functions": None, # For function calling, if supported - "tools": None, # For tool use, if supported - "add_generation_kwargs": {}, # Additional provider-specific parameters - "api_base": None # Custom API endpoint, if needed -} -``` - -When performing a RAG query you can combine these vector search, knowledge graph search, and generation settings at runtime: - -```python -from r2r import R2RClient - -client = R2RClient() - -response = client.rag( - "What are the latest advancements in quantum computing?", - rag_generation_config=rag_generation_config - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, -) -``` - -R2R defaults to the specified server-side settings when no runtime overrides are specified. -### RAG Prompt Override - -For specialized tasks, you can override the default RAG task prompt at runtime: - -```python -task_prompt_override = """You are an AI assistant specializing in quantum computing. -Your task is to provide a concise summary of the latest advancements in the field, -focusing on practical applications and breakthroughs from the past year.""" - -response = client.rag( - "What are the latest advancements in quantum computing?", - rag_generation_config=rag_generation_config, - task_prompt_override=task_prompt_override -) -``` - -This prompt can also be set statically on as part of the server configuration process. - -## Agent-based Interaction - -R2R supports multi-turn conversations and complex query processing through its agent endpoint: - -```python -messages = [ - {"role": "system", "content": "You are a helpful AI assistant."}, - {"role": "user", "content": "What are the key differences between quantum and classical computing?"} -] - -response = client.agent( - messages=messages, - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, - rag_generation_config=rag_generation_config, -) -``` - -The agent can break down complex queries into sub-tasks, leveraging both retrieval and generation capabilities to provide comprehensive responses. The settings specified in the example above will propagate to the agent and it's tools. - -By leveraging these configuration options, you can fine-tune R2R's retrieval and generation process to best suit your specific use case and requirements. - - - -## Next Steps - -For more detailed information on configuring specific components of R2R, please refer to the following pages: - -- [Postgres Configuration](/documentation/configuration/postgres) -- [LLM Configuration](/documentation/configuration/llm) -- [Ingestion Configuration](/documentation/configuration/ingestion/overview) -- [Knowledge Graph Configuration](/documentation/configuration/knowledge-graph/overview) -- [Retrieval Configuration](/documentation/configuration/retrieval/overview) diff --git a/docs/documentation/configuration/retrieval/knowledge-graph.mdx b/docs/documentation/configuration/retrieval/knowledge-graph.mdx deleted file mode 100644 index 31665f885..000000000 --- a/docs/documentation/configuration/retrieval/knowledge-graph.mdx +++ /dev/null @@ -1,38 +0,0 @@ ---- -title: 'Knowledge Graph' ---- -Knowledge graph search settings can be configured both server-side and at runtime. Runtime settings are passed as a dictionary to the search and RAG endpoints. You may refer to the [search API documentation here](/api-reference/endpoint/search) for additional materials. - - -```python -kg_search_settings = { - "use_kg_search": True, - "kg_search_type": "local", - "kg_search_level": None, - "generation_config": { - "model": "gpt-4", - "temperature": 0.1 - }, - "entity_types": ["Person", "Organization"], - "relationships": ["worksFor", "foundedBy"], - "max_community_description_length": 65536, - "max_llm_queries_for_global_search": 250, - "local_search_limits": {"__Entity__": 20, "__Relationship__": 20, "__Community__": 20} -} - -response = client.search("query", kg_search_settings=kg_search_settings) -``` - -**KGSearchSettings** - -1. `use_kg_search` (bool): Whether to use knowledge graph search -2. `kg_search_type` (str): Type of knowledge graph search ('global' or 'local') -3. `kg_search_level` (Optional[str]): Level of knowledge graph search -4. `generation_config` (Optional[GenerationConfig]): Configuration for knowledge graph search generation -5. `entity_types` (list): Types of entities to search for -6. `relationships` (list): Types of relationships to search for -7. `max_community_description_length` (int): Maximum length of community descriptions (default: 65536) -8. `max_llm_queries_for_global_search` (int): Maximum number of LLM queries for global search (default: 250) -9. `local_search_limits` (dict[str, int]): Limits for local search results by type - -These settings provide fine-grained control over the search process in R2R, including vector search, hybrid search, and knowledge graph search configurations. diff --git a/docs/documentation/configuration/retrieval/overview.mdx b/docs/documentation/configuration/retrieval/overview.mdx deleted file mode 100644 index d93934510..000000000 --- a/docs/documentation/configuration/retrieval/overview.mdx +++ /dev/null @@ -1,113 +0,0 @@ ---- -title: 'Overview' -description: 'Configure your R2R retrieval pipeline' ---- - -## Introduction - -Retrieval in R2R is a sophisticated system that leverages ingested data to provide powerful search and Retrieval-Augmented Generation (RAG) capabilities. It combines vector-based semantic search, knowledge graph querying, and language model generation to deliver accurate and contextually relevant results. - -## Key Configuration Areas - -To configure the retrieval system in R2R, you'll need to focus on several areas in your `r2r.toml` file: - -```toml -[database] -provider = "postgres" - -[embedding] -provider = "litellm" -base_model = "openai/text-embedding-3-small" -base_dimension = 512 -batch_size = 128 -add_title_as_prefix = false -rerank_model = "None" -concurrent_request_limit = 256 - -[database] -provider = "postgres" -batch_size = 256 - -[completion] -provider = "litellm" -concurrent_request_limit = 16 - -[completion.generation_config] -model = "openai/gpt-4" -temperature = 0.1 -top_p = 1 -max_tokens_to_sample = 1_024 -stream = false -``` - -These settings directly impact how R2R performs retrieval operations: - -- The `[database]` section configures the vector database used for semantic search and document management. -- The `[embedding]` section defines the model and parameters for converting text into vector embeddings. -- The `[database]` section, when configured, enables knowledge graph-based retrieval. -- The `[completion]` section sets up the language model used for generating responses in the RAG pipeline. - -## Customization and Advanced Features - -R2R's retrieval system is highly customizable, allowing you to: - -- Implement hybrid search combining vector-based and knowledge graph queries -- Customize search filters, limits, and query generation -- Add custom pipes to the search and RAG pipelines -- Implement reranking for improved result relevance - -### Structured Outputs - -R2R supports structured outputs for RAG responses, allowing you to define specific response formats using Pydantic models. This ensures consistent, type-safe responses that can be easily validated and processed programmatically. - -Some models may require the word 'JSON' to appear in their prompt for structured outputs to work. Be sure to update your prompt to reflect this, if necessary. - -Here's a simple example of using structured outputs with Pydantic models: - -```python -from r2r import R2RClient, GenerationConfig -from pydantic import BaseModel - -# Initialize the client -client = R2RClient() - -# Define your response structure -class ResponseModel(BaseModel): - answer: str - sources: list[str] - -# Make a RAG query with structured output -response = client.rag( - query="…", - rag_generation_config=GenerationConfig( - response_format=ResponseModel - ) -) -``` - -## Pipeline Architecture - -Retrieval in R2R is implemented as a pipeline and consists of the main components shown below: - -```mermaid -graph TD - A[User Query] --> B[RAG Pipeline] - B --> C[Search Pipeline] - B --> D[RAG Generation Pipeline] - C --> E[Vector Search] - C --> F[Knowledge Graph Search] - E --> G[Search Results] - F --> G - G --> D - D --> H[Generated Response] -``` - - -## Next Steps - -For more detailed information on configuring specific components of the ingestion pipeline, please refer to the following pages: - -- [Ingestion Configuration](/documentation/configuration/ingestion/overview) -- [Vector Search Configuration](/documentation/configuration/retrieval/vector-search) -- [Knowledge Graph Search Configuration](/documentation/configuration/retrieval/knowledge-graph) -- [Retrieval Configuration](/documentation/configuration/retrieval/overview) diff --git a/docs/documentation/configuration/retrieval/vector-search.mdx b/docs/documentation/configuration/retrieval/vector-search.mdx deleted file mode 100644 index cd633e471..000000000 --- a/docs/documentation/configuration/retrieval/vector-search.mdx +++ /dev/null @@ -1,65 +0,0 @@ ---- -title: 'Vector Search' ---- - -Vector search settings can be configured both server-side and at runtime. Runtime settings are passed as a dictionary to the search and RAG endpoints. You may refer to the [search API documentation here](/api-reference/endpoint/search) for additional materials. - - -Example using the Python SDK: - -```python -vector_search_settings = { - "use_vector_search": True, - "search_filters": {"document_type": {"$eq": "article"}}, - "search_limit": 20, - "use_hybrid_search": True, - "selected_collection_ids": ["c3291abf-8a4e-5d9d-80fd-232ef6fd8526"] -} - -response = client.search("query", vector_search_settings=vector_search_settings) -``` - -#### Configurable Parameters - -**VectorSearchSettings** - -1. `use_vector_search` (bool): Whether to use vector search -2. `use_hybrid_search` (bool): Whether to perform a hybrid search (combining vector and keyword search) -3. `filters` (dict): Alias for filters -3. `search_filters` (dict): Filters to apply to the vector search -4. `search_limit` (int): Maximum number of results to return (1-1000) -5. `selected_collection_ids` (list[UUID]): Collection Ids to search for -6. `index_measure` (IndexMeasure): The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product) -7. `include_values` (bool): Whether to include search score values in the search results -8. `include_metadatas` (bool): Whether to include element metadata in the search results -9. `probes` (Optional[int]): Number of ivfflat index lists to query (default: 10) -10. `ef_search` (Optional[int]): Size of the dynamic candidate list for HNSW index search (default: 40) -11. `hybrid_search_settings` (Optional[HybridSearchSettings]): Settings for hybrid search - -**HybridSearchSettings** - -1. `full_text_weight` (float): Weight to apply to full text search (default: 1.0) -2. `semantic_weight` (float): Weight to apply to semantic search (default: 5.0) -3. `full_text_limit` (int): Maximum number of results to return from full text search (default: 200) -4. `rrf_k` (int): K-value for RRF (Rank Reciprocal Fusion) (default: 50) - -#### Advanced Filtering - -R2R supports complex filtering using PostgreSQL-based queries. Allowed operators include: -- `eq`, `neq`: Equality and inequality -- `gt`, `gte`, `lt`, `lte`: Greater than, greater than or equal, less than, less than or equal -- `like`, `ilike`: Pattern matching (case-sensitive and case-insensitive) -- `in`, `nin`: Inclusion and exclusion in a list of values - -Example of advanced filtering: - -```python -filters = { - "$and": [ - {"publication_date": {"$gte": "2023-01-01"}}, - {"author": {"$in": ["John Doe", "Jane Smith"]}}, - {"category": {"$ilike": "%technology%"}} - ] -} -vector_search_settings["filters"] = filters -``` diff --git a/docs/documentation/deep-dive/main/builder.mdx b/docs/documentation/deep-dive/main/builder.mdx deleted file mode 100644 index 4f21d222a..000000000 --- a/docs/documentation/deep-dive/main/builder.mdx +++ /dev/null @@ -1,202 +0,0 @@ ---- -title: 'R2RBuilder' -description: 'Learn how to build and customize R2R' -# icon: 'wrench' ---- - -## Introduction - -R2R uses two key components for assembling and customizing applications: the `R2RBuilder` and a set of factory classes, `R2RFactory*`. These components work together to provide a flexible and intuitive way to construct R2R instances with custom configurations, providers, pipes, and pipelines. - -## R2RBuilder - -The `R2RBuilder` is the central component for assembling R2R applications. It employs the Builder pattern for simple application customization. - -### Key Features - -1. **Flexible Configuration**: Supports loading configurations from TOML files or using predefined configurations. -2. **Component Overrides**: Allows overriding default providers, pipes, and pipelines with custom implementations. -3. **Factory Customization**: Supports custom factory implementations for providers, pipes, and pipelines. -4. **Fluent Interface**: Provides a chainable API for easy and readable setup. - -### Basic Usage - -Here's a simple example of how to use the R2RBuilder: - -```python -from r2r import R2RBuilder, R2RConfig - -# Create an R2R instance with default configuration -r2r = R2RBuilder().build() - -# Create an R2R instance with a custom configuration file -r2r = R2RBuilder(config=R2RConfig.from_toml("path/to/config.toml")).build() - -# Create an R2R instance with a predefined configuration -r2r = R2RBuilder(config_name="full").build() -``` - -## Factories - -R2R uses a set of factory classes to create various components of the system. These factories allow for easy customization and extension of R2R's functionality. - -### Main Factory Classes - -1. **R2RProviderFactory**: Creates provider instances (e.g., DatabaseProvider, EmbeddingProvider). -2. **R2RPipeFactory**: Creates individual pipes used in pipelines. -3. **R2RPipelineFactory**: Creates complete pipelines by assembling pipes. - -### Factory Methods - -Each factory class contains methods for creating specific components. For example, the `R2RPipeFactory` includes methods like: - -- `create_parsing_pipe()` -- `create_embedding_pipe()` -- `create_vector_search_pipe()` -- `create_rag_pipe()` - -### Customizing Factories - -You can customize the behavior of R2R by extending these factory classes. Here's a simplified example of extending the `R2RPipeFactory`: - -```python -from r2r import R2RPipeFactory -from r2r.pipes import MultiSearchPipe, QueryTransformPipe - -class R2RPipeFactoryWithMultiSearch(R2RPipeFactory): - def create_vector_search_pipe(self, *args, **kwargs): - # Create a custom multi-search pipe - query_transform_pipe = QueryTransformPipe( - llm_provider=self.providers.llm, - config=QueryTransformPipe.QueryTransformConfig( - name="multi_search", - task_prompt="multi_search_task_prompt", - ), - ) - - inner_search_pipe = super().create_vector_search_pipe(*args, **kwargs) - - return MultiSearchPipe( - query_transform_pipe=query_transform_pipe, - inner_search_pipe=inner_search_pipe, - config=MultiSearchPipe.PipeConfig(), - ) -``` - - -## Builder + Factory in action - -The R2RBuilder provides methods to override various components of the R2R system: - -### Provider Overrides - -```python -from r2r.providers import CustomAuthProvider, CustomDatabaseProvider - -builder = R2RBuilder() -builder.with_auth_provider(CustomAuthProvider()) -builder.with_database_provider(CustomDatabaseProvider()) -r2r = builder.build() -``` -Available provider override methods: -- `with_auth_provider` -- `with_database_provider` -- `with_embedding_provider` -- `with_eval_provider` -- `with_llm_provider` -- `with_crypto_provider` - - -### Pipe Overrides - -```python -from r2r.pipes import CustomParsingPipe, CustomEmbeddingPipe - -builder = R2RBuilder() -builder.with_parsing_pipe(CustomParsingPipe()) -builder.with_embedding_pipe(CustomEmbeddingPipe()) -r2r = builder.build() -``` - -Available pipe override methods: -- `with_parsing_pipe` -- `with_embedding_pipe` -- `with_vector_storage_pipe` -- `with_vector_search_pipe` -- `with_rag_pipe` -- `with_streaming_rag_pipe` -- `with_eval_pipe` -- `with_kg_pipe` -- `with_kg_storage_pipe` -- `with_kg_search_pipe` - - -### Pipeline Overrides - -```python -from r2r.pipelines import CustomIngestionPipeline, CustomSearchPipeline - -builder = R2RBuilder() -builder.with_ingestion_pipeline(CustomIngestionPipeline()) -builder.with_search_pipeline(CustomSearchPipeline()) -r2r = builder.build() -``` -Available pipeline override methods: -- `with_ingestion_pipeline` -- `with_search_pipeline` -- `with_rag_pipeline` -- `with_streaming_rag_pipeline` -- `with_eval_pipeline` - -### Factory Overrides - -```python -from r2r.factory import CustomProviderFactory, CustomPipeFactory, CustomPipelineFactory - -builder = R2RBuilder() -builder.with_provider_factory(CustomProviderFactory) -builder.with_pipe_factory(CustomPipeFactory) -builder.with_pipeline_factory(CustomPipelineFactory) -r2r = builder.build() -``` - - -## Advanced Usage - -For more complex scenarios, you can chain multiple customizations: - -```python -from r2r import R2RBuilder, R2RConfig -from r2r.pipes import CustomRAGPipe - -class MyCustomAuthProvider: - ... - -class MyCustomLLMProvider - ... - -class MyCustomRAGPipe - - -config = R2RConfig.from_toml("path/to/config.toml") - -r2r = ( - R2RBuilder(config=config) - .with_auth_provider(MyCustomAuthProvider()) - .with_llm_provider(MyCustomLLMProvider()) - .with_rag_pipe(MyCustomRAGPipe()) - .build() -) -``` - -This approach allows you to create highly customized R2R instances tailored to your specific needs. - -## Best Practices - -1. **Configuration Management**: Use separate configuration files for different environments (development, staging, production). -2. **Custom Components**: When creating custom providers, pipes, or pipelines, ensure they adhere to the respective interfaces defined in the R2R framework. -3. **Testability**: Create factory methods or builder configurations specifically for testing to easily mock or stub components. -4. **Logging**: Enable appropriate logging in your custom components to aid in debugging and monitoring. -5. **Error Handling**: Implement proper error handling in custom components and provide meaningful error messages. - -By leveraging the R2RBuilder and customizing factories, you can create flexible, customized, and powerful R2R applications tailored to your specific use cases and requirements. diff --git a/docs/documentation/deep-dive/main/config.mdx b/docs/documentation/deep-dive/main/config.mdx deleted file mode 100644 index 69e6a0d67..000000000 --- a/docs/documentation/deep-dive/main/config.mdx +++ /dev/null @@ -1,224 +0,0 @@ ---- -title: 'R2RConfig' -introduction: 'Learn how to configure your R2R application' ---- - -## Introduction - -`R2RConfig` uses a TOML-based configuration system to customize various aspects of R2R's functionality. This guide provides a detailed overview of how to configure R2R, including all available options and their meanings. - -## Configuration File Structure - -The R2R configuration is stored in a TOML file, which defaults to [`r2r.toml`](https://github.com/SciPhi-AI/R2R/blob/main/r2r.toml). The file is divided into several sections, each corresponding to a different aspect of the R2R system: - -- Authentication -- Completion (LLM) -- Cryptography -- Database -- Embedding -- Evaluation -- Ingestion -- Knowledge Graph -- Logging -- Prompt Management - -## Loading a Configuration - -To use a custom configuration, you can load it when initializing R2R: - -```python -from r2r import R2RConfig, R2RBuilder - -# Load a custom configuration -config = R2RConfig.from_toml("path/to/your/r2r.toml") -r2r = R2RBuilder(config).build() - -# Or use the preset configuration -r2r = R2RBuilder().build() -``` - -## Configuration Sections - -### Authentication -Refer to the [`AuthProvider`](/documentation/deep-dive/providers/auth) to learn more about how R2R supports auth providers. - -```toml -[auth] -provider = "r2r" -access_token_lifetime_in_minutes = 60 -refresh_token_lifetime_in_days = 7 -require_authentication = false -require_email_verification = false -default_admin_email = "admin@example.com" -default_admin_password = "change_me_immediately" -``` - -- `provider`: Authentication provider. Currently, only "r2r" is supported. -- `access_token_lifetime_in_minutes`: Lifespan of access tokens in minutes. -- `refresh_token_lifetime_in_days`: Lifespan of refresh tokens in days. -- `require_authentication`: If true, all secure routes require authentication. Otherwise, non-authenticated requests mock superuser access. -- `require_email_verification`: If true, email verification is required for new accounts. -- `default_admin_email` and `default_admin_password`: Credentials for the default admin account. - -### Completion (LLM) - -Refer to the [`LLMProvider`](/documentation/deep-dive/providers/llms) to learn more about how R2R supports LLM providers. -```toml - -[completion] -provider = "litellm" -concurrent_request_limit = 16 - - [completion.generation_config] - model = "openai/gpt-4o" - temperature = 0.1 - top_p = 1 - max_tokens_to_sample = 1_024 - stream = false - add_generation_kwargs = { } -``` - -- `provider`: LLM provider. Options include "litellm" and "openai". -- `concurrent_request_limit`: Maximum number of concurrent requests allowed. -- `generation_config`: Detailed configuration for text generation. - - `model`: The specific LLM model to use. - - `temperature`: Controls randomness in generation (0.0 to 1.0). - - `top_p`: Parameter for nucleus sampling. - - `max_tokens_to_sample`: Maximum number of tokens to generate. - - Other parameters control various aspects of text generation. - -### Cryptography - -Refer to the [`CryptoProvider`](/documentation/deep-dive/providers/auth) to learn more about how R2R supports cryptography. - -```toml -[crypto] -provider = "bcrypt" -``` - -- `provider`: Cryptography provider for password hashing. Currently, only "bcrypt" is supported. - -### Database - -Refer to the [`DatabaseProvider`](/documentation/deep-dive/providers/database) to learn more about how R2R supports databases. - -```toml -[database] -provider = "postgres" -``` - -- `provider`: Database provider. Only "postgres" is supported. -- `user`: Default username for accessing database. -- `password`: Default password for accessing database. -- `host`: Default host for accessing database. -- `port`: Default port for accessing database. -- `db_name`: Default db_name for accessing database. - -### Embedding - -Refer to the [`EmbeddingProvider`](/documentation/deep-dive/providers/embeddings) to learn more about how R2R supports embeddings. - -```toml -[embedding] -provider = "litellm" -base_model = "text-embedding-3-small" -base_dimension = 512 -batch_size = 128 -add_title_as_prefix = false -rerank_model = "None" -concurrent_request_limit = 256 -``` - -- `provider`: Embedding provider. Options include "ollama", "openai" and "sentence-transformers". -- `base_model`: The specific embedding model to use. -- `base_dimension`: Dimension of the embedding vectors. -- `batch_size`: Number of items to process in a single batch. -- `add_title_as_prefix`: Whether to add the title as a prefix to the embedded text. -- `rerank_model`: Model used for reranking, if any. -- `concurrent_request_limit`: Maximum number of concurrent embedding requests. - -### Evaluation - -```toml -[eval] -provider = "None" -``` - -- `provider`: Evaluation provider. Set to "None" to disable evaluation functionality. - -### Knowledge Graph - -Refer to the [`KGProvider`](documentation/deep-dive/providers/knowledge-graph) to learn more about how R2R supports knowledge graphs. - -```toml -[database] -provider = "postgres" -batch_size = 1 - -[database.kg_extraction_config] -model = "gpt-4o" -temperature = 0.1 -top_p = 1 -max_tokens_to_sample = 1_024 -stream = false -add_generation_kwargs = { } -``` - -- `provider`: Specifies the backend used for storing and querying the knowledge graph. Options include "postgres" and "None". -- `batch_size`: Determines how many text chunks are processed at once for knowledge extraction. -- `kg_extraction_config`: Configures the language model used for extracting knowledge from text chunks. - -### Logging - -```toml -[logging] -provider = "local" -log_table = "logs" -log_info_table = "log_info" -``` - -- `provider`: Logging provider. Currently set to "local". -- `log_table`: Name of the table where logs are stored. -- `log_info_table`: Name of the table where log information is stored. - -### Prompt Management - -```toml -[prompt] -provider = "r2r" -``` - -- `provider`: Prompt management provider. Currently set to "r2r". - -## Advanced Configuration - -### Environment Variables - -For sensitive information like API keys, it's recommended to use environment variables instead of hardcoding them in the configuration file. R2R will automatically look for environment variables for certain settings. - -### Custom Providers - -R2R supports custom providers for various components. To use a custom provider, you'll need to implement the appropriate interface and register it with R2R. Refer to the developer documentation for more details on creating custom providers. - -### Configuration Validation - -R2R performs validation on the configuration when it's loaded. If there are any missing required fields or invalid values, an error will be raised. Always test your configuration in a non-production environment before deploying. - -## Best Practices - -1. **Security**: Never commit sensitive information like API keys or passwords to version control. Use environment variables instead. -2. **Modularity**: Create separate configuration files for different environments (development, staging, production). -3. **Documentation**: Keep your configuration files well-commented, especially when using custom or non-standard settings. -4. **Version Control**: Track your configuration files in version control, but use `.gitignore` to exclude files with sensitive information. -5. **Regular Review**: Periodically review and update your configuration to ensure it aligns with your current needs and best practices. - -## Troubleshooting - -If you encounter issues with your configuration: - -1. Check the R2R logs for any error messages related to configuration. -2. Verify that all required fields are present in your configuration file. -3. Ensure that the values in your configuration are of the correct type (string, number, boolean, etc.). -4. If using custom providers or non-standard settings, double-check the documentation or consult with the R2R community. - -By following this guide, you should be able to configure R2R to suit your specific needs. Remember that R2R is highly customizable, so don't hesitate to explore different configuration options to optimize your setup. diff --git a/docs/documentation/deep-dive/main/introduction.mdx b/docs/documentation/deep-dive/main/introduction.mdx deleted file mode 100644 index 23dcab8cd..000000000 --- a/docs/documentation/deep-dive/main/introduction.mdx +++ /dev/null @@ -1,101 +0,0 @@ ---- -title: 'Core Logic' -introduction: 'Learn about the main R2R application logic' ---- - -## Introduction - -R2R is a an engine for building user-facing Retrieval-Augmented Generation (RAG) applications. At its core, R2R uses several main components to create and manage these applications: `R2RConfig`, `R2RBuilder`, `R2REngine`, `R2RApp`, and `R2R`. In this section we will explore all of these components in detail. - -## Assembly Process - -The following diagram illustrates how R2R assembles a user-facing application: - -```mermaid -flowchart TD - subgraph "Assembly" - direction TB - Config[R2RConfig] - Builder[R2RBuilder] - - subgraph Factories - PF[R2RProviderFactory] - PiF[R2RPipeFactory] - PlF[R2RPipelineFactory] - end - - Engine[Orchestration] - API[API] - - Config --> Builder - Builder --> PF - Builder --> PiF - Builder --> PlF - PF --> Engine - PiF --> Engine - PlF --> Engine - Engine --> API - end - - User[Developer] --> |Customizes| Config - User --> |Uses| Builder - User --> |Overrides| Factories -``` - -## R2RConfig - -R2RConfig is the configuration management class for R2R applications. It serves as the central point for defining and managing all settings required by various components of the R2R system. - -Key features of R2RConfig: - -1. **Configurable**: R2RConfig loads its settings from a TOML file, making it easy to modify and version control. -2. **Hierarchical Structure**: The configuration is organized into sections, each corresponding to a specific aspect of the R2R system (e.g., embedding, database, logging). -3. **Default Values**: R2RConfig provides a set of default values, which can be overridden as needed. -4. **Validation**: The class includes methods to validate the configuration, ensuring all required settings are present. -5. **Type-safe Access**: Once loaded, the configuration provides type-safe access to settings, reducing the risk of runtime errors. - -Example usage: -```python -config = R2RConfig.from_toml("path/to/config.toml") -embedding_model = config.embedding.base_model -max_file_size = config.app.max_file_size_in_mb -``` - -## R2RBuilder - -R2RBuilder is the central component for assembling R2R applications, using the builder pattern. It provides a flexible way to construct and customize the R2R system. - -Key features of R2RBuilder: - -1. **Fluent Interface**: Allows chaining of configuration methods for easy setup. -2. **Component Overrides**: Provides methods to override default providers, pipes, and pipelines. -3. **Factory Customization**: Supports custom factory implementations for providers, pipes, and pipelines. -4. **Configuration Integration**: Uses R2RConfig to set up the system according to specified settings. - -Example usage: -```python -builder = R2RBuilder(config=config) -builder.with_embedding_provider(custom_embedding_provider) -builder.with_llm_provider(custom_llm_provider) -r2r = builder.build() -``` - -## R2RApp - -R2RApp is the class responsible for setting up the FastAPI application that serves as the interface for the R2R system. It encapsulates the routing and CORS configuration for the R2R API. - -Key features of R2RApp: - -1. **FastAPI Integration**: Creates and configures a FastAPI application, setting up all necessary routes. -2. **Route Setup**: Automatically sets up routes for ingestion, management, retrieval, and authentication based on the provided R2REngine. -3. **CORS Configuration**: Applies CORS (Cross-Origin Resource Sharing) settings to the FastAPI application. -4. **Serving Capability**: Includes a `serve` method to easily start the FastAPI server. - -Example usage: -```python -app = r2r.app -app.serve(host="0.0.0.0", port=7272) -``` - - -By combining these core components, R2R provides a flexible and powerful system for building and deploying RAG applications. The R2RConfig allows for easy customization while the R2RBuilder facilitates the assembly process. Lastly, the R2RApp offers a robust API interface. diff --git a/docs/documentation/deep-dive/other/telemetry.mdx b/docs/documentation/deep-dive/other/telemetry.mdx deleted file mode 100644 index 734d26db8..000000000 --- a/docs/documentation/deep-dive/other/telemetry.mdx +++ /dev/null @@ -1,53 +0,0 @@ ---- -title: 'Telemetry' -description: 'Learn about R2R telemetry and how to manage it' ---- - -R2R uses telemetry to collect **anonymous** usage information. This data helps us understand how R2R is used, prioritize new features and bug fixes, and improve overall performance and stability. - -## Disabling Telemetry - -To opt out of telemetry, you can set an environment variable: - -```bash -export TELEMETRY_ENABLED=false -``` - - -Valid values to disable telemetry are `false`, `0`, or `f`. When telemetry is disabled, no events will be captured. - - -## Collected Information - -Our telemetry system collects basic, anonymous information such as: - -- **Feature Usage**: Which features are being used and their frequency of use. - -## Data Storage - - - - -We use [Posthog](https://posthog.com/) to store and analyze telemetry data. Posthog is an open-source platform for product analytics. - -For more information about Posthog: -- Visit their website: [posthog.com](https://posthog.com/) -- Check out their GitHub repository: [github.com/posthog](https://github.com/posthog) - - - - -## Why We Collect Telemetry - -Telemetry data helps us: - -1. Understand which features are most valuable to users -2. Identify areas for improvement -3. Prioritize development efforts -4. Enhance R2R's overall performance and stability - -We appreciate your participation in our telemetry program, as it directly contributes to making R2R better for everyone. - - -We respect your privacy. All collected data is anonymous and used solely for improving R2R. - diff --git a/docs/documentation/deep-dive/providers/auth.mdx b/docs/documentation/deep-dive/providers/auth.mdx deleted file mode 100644 index c988b40e1..000000000 --- a/docs/documentation/deep-dive/providers/auth.mdx +++ /dev/null @@ -1,350 +0,0 @@ ---- -title: 'Crypto & Auth' -description: 'Learn how to configure and use the authentication provider in R2R' ---- - -## Introduction - -R2R's `CryptoProvider` and `AuthProvider` combine to handle user authentication and cryptographic operations in your applications. This guide offers an in-depth look at the system's architecture, configuration options, and best practices for implementation. - -For a practical, step-by-step guide on implementing authentication in R2R, including code examples and common use cases, see our [User Auth Cookbook](/cookbooks/user-auth). - - -When authentication is not required (require_authentication is set to false, which is the default in `r2r.toml`), unauthenticated requests will default to using the credentials of the default admin user. - -This behavior ensures that operations can proceed smoothly in development or testing environments where authentication may not be enforced, but it should be used with caution in production settings. - - -## Architecture Overview - -R2R's Crypto & Auth system is built on two primary components: - -1. **Authentication Provider**: Handles user registration, login, token management, and related operations. -2. **Cryptography Provider**: Manages password hashing, verification, and generation of secure tokens. - -These providers work in tandem to ensure secure user management and data protection. - -## Providers - -### Authentication Provider - -The default `R2RAuthProvider` offers a complete authentication solution. - -Key features: -- JWT-based access and refresh tokens -- User registration and login -- Email verification (optional) -- Password reset functionality -- Superuser capabilities - -### Cryptography Provider - -The default `BCryptProvider` handles cryptographic operations. - -Key features: -- Secure password hashing using bcrypt -- Password verification -- Generation of cryptographically secure verification codes - -## Configuration - -### Authentication Configuration - -```toml -[auth] -provider = "r2r" -access_token_minutes_lifetime = 60 -access_token_days_lifetime = 7 -require_authentication = true -require_email_verification = false -default_admin_email = "admin@example.com" -default_admin_password = "change_me_immediately" -``` - -### Cryptography Configuration - -```toml -[crypto] -provider = "bcrypt" -salt_rounds = 12 -``` - -## Secret Key Management - -R2R uses a secret key for JWT signing. Generate a secure key using: - -```bash -r2r generate-private-key -``` - -Set the key as an environment variable: - -```bash -export R2R_SECRET_KEY=your_generated_key -``` - - -Never commit your secret key to version control. Use environment variables or secure key management solutions in production. - - -## Auth Service Endpoints - -The AuthProvider is responsible for providing functionality to support these core endpoints in R2R: - -1. `register`: User registration -2. `login`: User authentication -3. `refresh_access_token`: Token refresh -4. `logout`: Session termination -5. `user`: Retrieve user data -6. `change_password`: Update user password -7. `request_password_reset`: Initiate password reset -8. `confirm_password_reset`: Complete password reset -9. `verify_email`: Email verification -10. `get_user_profile`: Fetch user profile -11. `update_user`: Modify user profile -12. `delete_user_account`: Account deletion - -## Implementation Guide - -### User Registration - -```python -from r2r import R2RClient, UserCreate - -client = R2RClient("http://localhost:7272") - -result = client.register(user@example.com, secure_password123) -print(f"Registration Result: {result}") -``` - -### User Authentication - -```python -login_result = client.login("user@example.com", "secure_password123") -client.access_token = login_result['results']['access_token']['token'] -client.refresh_token = login_result['results']['refresh_token']['token'] -``` - -### Making Authenticated Requests - -```python -user = client.user() -print(f"Authenticated User Info: {user}") -``` - -### Token Refresh - -```python -refresh_result = client.refresh_access_token() -client.access_token = refresh_result['results']['access_token']['token'] -``` - -### Logout - -```python -logout_result = client.logout() -print(f"Logout Result: {logout_result}") -``` - -## Security Best Practices - -1. **HTTPS**: Always use HTTPS in production. -2. **Authentication Requirement**: Set `require_authentication` to `true` in production. -3. **Email Verification**: Enable `require_email_verification` for enhanced security. -4. **Password Policy**: Implement and enforce strong password policies. -5. **Rate Limiting**: Implement rate limiting on authentication endpoints. -6. **Token Management**: Implement secure token storage and transmission. -7. **Regular Audits**: Conduct regular security audits of your authentication system. - - -## Custom Authentication Flows and External Identity Providers in R2R - -### Custom Authentication Flows - -To implement custom authentication flows in R2R, you can extend the `AuthProvider` abstract base class. This allows you to create tailored authentication methods while maintaining compatibility with the R2R ecosystem. - -Here's an example of how to create a custom authentication provider: - -```python -from r2r.base import AuthProvider, AuthConfig -from r2r.abstractions.user import User, UserCreate, Token, TokenData -from typing import Dict - -class CustomAuthProvider(AuthProvider): - def __init__(self, config: AuthConfig): - super().__init__(config) - # Initialize any custom attributes or connections here - - def create_access_token(self, data: dict) -> str: - # Implement custom access token creation logic - pass - - def create_refresh_token(self, data: dict) -> str: - # Implement custom refresh token creation logic - pass - - def decode_token(self, token: str) -> TokenData: - # Implement custom token decoding logic - pass - - def user(self, token: str) -> User: - # Implement custom user info retrieval logic - pass - - def get_current_active_user(self, current_user: User) -> User: - # Implement custom active user validation logic - pass - - def register(self, user: UserCreate) -> Dict[str, str]: - # Implement custom user registration logic - pass - - def verify_email(self, verification_code: str) -> Dict[str, str]: - # Implement custom email verification logic - pass - - def login(self, email: str, password: str) -> Dict[str, Token]: - # Implement custom login logic - pass - - def refresh_access_token(self, user_email: str, refresh_access_token: str) -> Dict[str, str]: - # Implement custom token refresh logic - pass - - async def auth_wrapper(self, auth: Optional[HTTPAuthorizationCredentials] = Security(security)) -> User: - # You can override this method if you need custom authentication wrapper logic - return await super().auth_wrapper(auth) - - # Add any additional custom methods as needed - async def custom_auth_method(self, ...): - # Implement custom authentication logic - pass -``` - -### Integrating External Identity Providers - -To integrate external identity providers (e.g., OAuth, SAML) with R2R, you can create a custom `AuthProvider` that interfaces with these external services. Here's an outline of how you might approach this: - -1. Create a new class that extends `AuthProvider`: - -```python -from r2r.base import AuthProvider, AuthConfig -from r2r.abstractions.user import User, UserCreate, Token, TokenData -import some_oauth_library # Replace with actual OAuth library - -class OAuthAuthProvider(AuthProvider): - def __init__(self, config: AuthConfig): - super().__init__(config) - self.oauth_client = some_oauth_library.Client( - client_id=config.oauth_client_id, - client_secret=config.oauth_client_secret - ) - - async def login(self, email: str, password: str) -> Dict[str, Token]: - # Instead of password-based login, initiate OAuth flow - auth_url = self.oauth_client.get_authorization_url() - # Return auth_url or handle redirect as appropriate for your app - pass - - async def oauth_callback(self, code: str) -> Dict[str, Token]: - # Handle OAuth callback - token = await self.oauth_client.get_access_token(code) - user_data = await self.oauth_client.get_user_info(token) - - # Map external user data to R2R's user model - r2r_user = self._map_oauth_user_to_r2r_user(user_data) - - # Create and return R2R tokens - access_token = self.create_access_token({"sub": r2r_user.email}) - refresh_token = self.create_refresh_token({"sub": r2r_user.email}) - return { - "access_token": Token(token=access_token, token_type="access"), - "refresh_token": Token(token=refresh_token, token_type="refresh"), - } - - def _map_oauth_user_to_r2r_user(self, oauth_user_data: dict) -> User: - # Map OAuth user data to R2R User model - return User( - email=oauth_user_data["email"], - # ... map other fields as needed - ) - - # Implement other required methods... -``` - -2. Update your R2R configuration to use the new provider: - -```python -from r2r import R2R -from r2r.base import AuthConfig -from .custom_auth import OAuthAuthProvider - -auth_config = AuthConfig( - provider="custom_oauth", - oauth_client_id="your_client_id", - oauth_client_secret="your_client_secret", - # ... other config options -) - -r2r = R2R( - auth_provider=OAuthAuthProvider(auth_config), - # ... other R2R configuration -) -``` - -3. Implement necessary routes in your application to handle OAuth flow: - -```python -from fastapi import APIRouter, Depends -from r2r import R2R - -router = APIRouter() - -@router.get("/login") -async def login(): - return await r2r.auth_provider.login(None, None) # Initiate OAuth flow - -@router.get("/oauth_callback") -async def oauth_callback(code: str): - return await r2r.auth_provider.oauth_callback(code) -``` - -Remember to handle error cases, token storage, and user session management according to your application's needs and the specifics of the external identity provider you're integrating with. - -This approach allows you to leverage R2R's authentication abstractions while integrating with external identity providers, giving you flexibility in how you manage user authentication in your application. - -### Integrating External Identity Providers - -To integrate with external identity providers (e.g., OAuth, SAML): - -1. Implement a custom `AuthProvider`. -2. Handle token exchange and user profile retrieval. -3. Map external user data to R2R's user model. - -### Scaling Authentication - -For high-traffic applications: - -1. Implement token caching (e.g., Redis). -2. Consider microservices architecture for auth services. -3. Use database replication for read-heavy operations. - -## Troubleshooting - -Common issues and solutions: - -1. **Token Expiration**: Ensure proper token refresh logic. -2. **CORS Issues**: Configure CORS settings for cross-origin requests. -3. **Password Reset Failures**: Check email configuration and token expiration settings. - -## Performance Considerations - -1. **BCrypt Rounds**: Balance security and performance when setting `salt_rounds`. -2. **Database Indexing**: Ensure proper indexing on frequently queried user fields. -3. **Caching**: Implement caching for frequently accessed user data. - -## Conclusion - -R2R's Crypto & Auth system provides a solid foundation for building secure, scalable applications. By understanding its components, following best practices, and leveraging its flexibility, you can create robust authentication systems tailored to your specific needs. - -For further customization and advanced use cases, refer to the [R2R API Documentation](/api-reference) and [configuration guide](/documentation/deep-dive/main/config). diff --git a/docs/documentation/deep-dive/providers/database.mdx b/docs/documentation/deep-dive/providers/database.mdx deleted file mode 100644 index 0e7857244..000000000 --- a/docs/documentation/deep-dive/providers/database.mdx +++ /dev/null @@ -1,183 +0,0 @@ ---- -title: 'Database' -description: 'Learn how to configure and use the database provider in R2R' ---- -## Introduction - -R2R's `DatabaseProvider` offers a unified interface for both relational and vector database operations. R2R only provides database support through Postgres with the pgvector extension. - -Postgres was selected to power R2R because it is free, open source, and widely considered be a stable state of the art database. Further, the Postgres community has implemented pgvector for efficient storage and retrieval of vector embeddings alongside traditional relational data. - -## Architecture Overview - -The Database Provider in R2R is built on two primary components: - -1. **Vector Database Provider**: Handles vector-based operations such as similarity search and hybrid search. -2. **Relational Database Provider**: Manages traditional relational database operations, including user management and document metadata storage. - -These providers work in tandem to ensure efficient data management and retrieval. - -## Configuration - -Update the `database` section in your `r2r.toml` file: - -```toml -[database] -provider = "postgres" -user = "your_postgres_user" -password = "your_postgres_password" -host = "your_postgres_host" -port = "your_postgres_port" -db_name = "your_database_name" -your_project_name = "your_project_name" -``` - -Alternatively, you can set these values using environment variables: - -```bash -export R2R_POSTGRES_USER=your_postgres_user -export R2R_POSTGRES_PASSWORD=your_postgres_password -export R2R_POSTGRES_HOST=your_postgres_host -export R2R_POSTGRES_PORT=your_postgres_port -export R2R_POSTGRES_DBNAME=your_database_name -export R2R_PROJECT_NAME=your_project_name -``` -Environment variables take precedence over the config settings in case of conflicts. The R2R Docker includes configuration options that facilitate integration with a combined Postgres+pgvector database setup. - -## Vector Database Operations - -### Initialization - -The vector database is automatically initialized with dimensions that correspond to your selected embedding model when the Database Provider is first created. - -### Upsert Vector Entries - -```python -from r2r import R2R, VectorEntry - -app = R2R() - -vector_entry = VectorEntry(id="unique_id", vector=[0.1, 0.2, 0.3], metadata={"key": "value"}) -app.providers.database.vector.upsert(vector_entry) -``` - -### Search - -```python -results = app.providers.database.vector.search(query_vector=[0.1, 0.2, 0.3], limit=10) -``` - -### Hybrid Search - -```python -results = app.providers.database.vector.hybrid_search( - query_text="search query", - query_vector=[0.1, 0.2, 0.3], - limit=10, - full_text_weight=1.0, - semantic_weight=1.0 -) -``` - -### Delete by Metadata - -```python -deleted_ids = app.providers.database.vector.delete_by_metadata( - metadata_fields=["key1", "key2"], - metadata_values=["value1", "value2"] -) -``` - -## Relational Database Operations - -### User Management - -#### Create User - -```python -from r2r import UserCreate - -new_user = UserCreate(email="user@example.com", password="secure_password") -created_user = app.providers.database.relational.create_user(new_user) -``` - -#### Get User by Email - -```python -user = app.providers.database.relational.get_user_by_email("user@example.com") -``` - -#### Update User - -```python -user.name = "New Name" -updated_user = app.providers.database.relational.update_user(user) -``` - -### Document Management - -#### Upsert Document Overview - -```python -from r2r import DocumentInfo - -doc_info = DocumentInfo( - document_id="doc_id", - title="Document Title", - user_id="user_id", - version="1.0", - size_in_bytes=1024, - metadata={"key": "value"}, - status="processed" -) -app.providers.database.relational.upsert_documents_overview([doc_info]) -``` - -#### Get Documents Overview - -```python -docs = app.providers.database.relational.get_documents_overview( - filter_document_ids=["doc_id1", "doc_id2"], - filter_user_ids=["user_id1", "user_id2"] -) -``` - -## Advanced Features - -### Hybrid Search Function - -The Database Provider includes a custom Postgres function for hybrid search, combining full-text and vector similarity search. - -### Token Blacklisting - -For enhanced security, the provider supports token blacklisting: - -```python -app.providers.database.relational.blacklist_token("token_to_blacklist") -is_blacklisted = app.providers.database.relational.is_token_blacklisted("token_to_check") -``` - -## Security Best Practices - -1. **Environment Variables**: Use environment variables for sensitive information like database credentials. -2. **Connection Pooling**: The provider uses connection pooling for efficient database connections. -3. **Prepared Statements**: SQL queries use prepared statements to prevent SQL injection. -4. **Password Hashing**: User passwords are hashed before storage using the configured Crypto Provider. - -## Performance Considerations - -1. **Indexing**: The vector database automatically creates appropriate indexes for efficient similarity search. -2. **Batch Operations**: Use batch operations like `upsert_entries` for better performance when dealing with multiple records. -3. **Connection Reuse**: The provider reuses database connections to minimize overhead. - -## Troubleshooting - -Common issues and solutions: - -1. **Connection Errors**: Ensure your database credentials and connection details are correct. -2. **Dimension Mismatch**: Verify that the vector dimension in your configuration matches your embeddings. -3. **Performance Issues**: Consider optimizing your queries or adding appropriate indexes. - -## Conclusion - -R2R's Database Provider offers a powerful and flexible solution for managing both vector and relational data. By leveraging Postgres with pgvector, it provides efficient storage and retrieval of embeddings alongside traditional relational data, enabling advanced search capabilities and robust user management. diff --git a/docs/documentation/deep-dive/providers/embeddings.mdx b/docs/documentation/deep-dive/providers/embeddings.mdx deleted file mode 100644 index aef2d91d2..000000000 --- a/docs/documentation/deep-dive/providers/embeddings.mdx +++ /dev/null @@ -1,557 +0,0 @@ -# Embeddings in R2R - -## Introduction - -R2R supports multiple Embedding providers, offering flexibility in choosing and switching between different models based on your specific requirements. This guide provides an in-depth look at configuring and using various Embedding providers within the R2R framework. - -For a quick start on basic configuration, including embedding setup, please refer to our [configuration guide](/documentation/configuration). - -## Providers - -R2R currently supports the following cloud embedding providers: -- OpenAI -- Azure -- Cohere -- HuggingFace -- Bedrock (Amazon) -- Vertex AI (Google) -- Voyage AI - -And for local inference: -- Ollama -- SentenceTransformers - -## Configuration - -Update the `embedding` section in your `r2r.toml` file to configure your embedding provider. Here are some example configurations: - - - - -The default R2R configuration uses LiteLLM to communicate with OpenAI: - -```toml -[embedding] -provider = "litellm" -base_model = "text-embedding-3-small" -base_dimension = 512 -batch_size = 128 -add_title_as_prefix = false -rerank_model = "None" -concurrent_request_limit = 256 - -[embedding.text_splitter] -type = "recursive_character" -chunk_size = 512 -chunk_overlap = 20 -``` - - - -Here is how to configure R2R to communicate with OpenAI via their client: - -```toml -[embedding] -provider = "openai" -base_model = "text-embedding-3-small" -base_dimension = 512 -batch_size = 128 -add_title_as_prefix = false -rerank_model = "None" -concurrent_request_limit = 256 - -[embedding.text_splitter] -type = "recursive_character" -chunk_size = 512 -chunk_overlap = 20 -``` - - - - -For local embedding generation using Ollama: - -```toml -[embedding] -provider = "ollama" -base_model = "mxbai-embed-large" -base_dimension = 1024 -batch_size = 32 -concurrent_request_limit = 32 - -[embedding.text_splitter] -type = "recursive_character" -chunk_size = 512 -chunk_overlap = 20 -``` - - - - - -## Selecting Different Embedding Providers - -R2R supports a wide range of embedding providers through LiteLLM. Here's how to configure and use them: - - - - ```python - export OPENAI_API_KEY=your_openai_key - # Update r2r.toml: - # "provider": "litellm" | "openai" - # "base_model": "text-embedding-3-small" - # "base_dimension": 512 - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - text-embedding-3-small - - text-embedding-3-small - - text-embedding-ada-002 - - - - ```python - export AZURE_API_KEY=your_azure_api_key - export AZURE_API_BASE=your_azure_api_base - export AZURE_API_VERSION=your_azure_api_version - # Update r2r.toml: - # "provider": "litellm" - # "base_model": "azure/" - # "base_dimension": XXX - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - text-embedding-ada-002 - - - - ```python - export COHERE_API_KEY=your_cohere_api_key - # Update r2r.toml: - # "provider": "litellm" - # "base_model": "embed-english-v3.0" - # "base_dimension": 1024 - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - embed-english-v3.0 - - embed-english-light-v3.0 - - embed-multilingual-v3.0 - - embed-multilingual-light-v3.0 - - - - ```toml r2r.toml - [embedding] - provider = "ollama" - base_model = "mxbai-embed-large" - base_dimension = 1024 - batch_size = 32 - concurrent_request_limit = 32 - ``` - Deploy your R2R server with: - ``` - r2r serve --config-path=r2r.toml - ``` - - - - ```python - export HUGGINGFACE_API_KEY=your_huggingface_api_key - # Update r2r.toml: - # "provider": "litellm" - # "base_model": "huggingface/microsoft/codebert-base" - # "base_dimension": 768 - r2r serve --config-path=r2r.toml - ``` - LiteLLM supports all Feature-Extraction Embedding models on HuggingFace. - - - - ```python - export AWS_ACCESS_KEY_ID=your_access_key - export AWS_SECRET_ACCESS_KEY=your_secret_key - export AWS_REGION_NAME=your_region_name - # Update r2r.toml: - # "provider": "litellm" - # "base_model": "amazon.titan-embed-text-v1" - # "base_dimension": 1024 - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - amazon.titan-embed-text-v1 - - cohere.embed-english-v3 - - cohere.embed-multilingual-v3 - - - - ```python - export GOOGLE_APPLICATION_CREDENTIALS=path/to/your/credentials.json - export VERTEX_PROJECT=your_project_id - export VERTEX_LOCATION=your_project_location - # Update r2r.toml: - # "provider": "litellm" - # "base_model": "vertex_ai/textembedding-gecko" - # "base_dimension": 768 - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - textembedding-gecko - - textembedding-gecko-multilingual - - - - ```python - export VOYAGE_API_KEY=your_voyage_api_key - # Update r2r.toml: - # "provider": "litellm" - # "base_model": "voyage/voyage-01" - # "base_dimension": 1024 - r2r serve --config-path=r2r.toml - ``` - Supported models include: - - voyage-01 - - voyage-lite-01 - - voyage-lite-01-instruct - - - -## Embedding Service Endpoints - -The EmbeddingProvider is responsible for core functionalities in these R2R endpoints: - -1. `update_files`: When updating existing files in the system -2. `ingest_files`: During the ingestion of new files -3. `search`: For embedding search queries -4. `rag`: As part of the Retrieval-Augmented Generation process - -Here's how you can use these endpoints with embeddings: - -### File Ingestion - -```python -from r2r import R2R - -app = R2R() - -# Ingest a file, which will use the configured embedding model -response = app.ingest_files(["path/to/your/file.txt"]) -print(f"Ingestion response: {response}") -``` - -### Search - -```python -# Perform a search, which will embed the query using the configured model -search_results = app.search("Your search query here") -print(f"Search results: {search_results}") -``` - -### RAG (Retrieval-Augmented Generation) - -```python -# Use RAG, which involves embedding for retrieval -rag_response = app.rag("Your question or prompt here") -print(f"RAG response: {rag_response}") -``` - -### Updating Files - -```python -# Update existing files, which may involve re-embedding -update_response = app.update_files(["path/to/updated/file.txt"]) -print(f"Update response: {update_response}") -``` - - -Remember that you don't directly call the embedding methods in your application code. R2R handles the embedding process internally based on your configuration. - - -## Security Best Practices - -1. **API Key Management**: Use environment variables or secure key management solutions for API keys. -2. **Input Validation**: Sanitize and validate all inputs before generating embeddings. -3. **Rate Limiting**: Implement rate limiting to prevent abuse of embedding endpoints. -4. **Monitoring**: Regularly monitor embedding usage for anomalies or misuse. - - -## Custom Embedding Providers in R2R - -You can create custom embedding providers by inheriting from the `EmbeddingProvider` class and implementing the required methods. This allows you to integrate any embedding model or service into R2R. - -### Embedding Provider Structure - -The Embedding system in R2R is built on two main components: - -1. `EmbeddingConfig`: A configuration class for Embedding providers. -2. `EmbeddingProvider`: An abstract base class that defines the interface for all Embedding providers. - -### EmbeddingConfig - -The `EmbeddingConfig` class is used to configure Embedding providers: - -```python -from r2r.base import ProviderConfig -from typing import Optional - -class EmbeddingConfig(ProviderConfig): - provider: Optional[str] = None - base_model: Optional[str] = None - base_dimension: Optional[int] = None - rerank_model: Optional[str] = None - rerank_dimension: Optional[int] = None - rerank_transformer_type: Optional[str] = None - batch_size: int = 1 - prefixes: Optional[dict[str, str]] = None - - def validate(self) -> None: - if self.provider not in self.supported_providers: - raise ValueError(f"Provider '{self.provider}' is not supported.") - - @property - def supported_providers(self) -> list[str]: - return [None, "openai", "ollama", "sentence-transformers"] -``` - -### EmbeddingProvider - -The `EmbeddingProvider` is an abstract base class that defines the common interface for all Embedding providers: - -```python -from abc import abstractmethod -from enum import Enum -from r2r.base import Provider -from r2r.abstractions.embedding import EmbeddingPurpose -from r2r.abstractions.search import VectorSearchResult - -class EmbeddingProvider(Provider): - class PipeStage(Enum): - BASE = 1 - RERANK = 2 - - def __init__(self, config: EmbeddingConfig): - if not isinstance(config, EmbeddingConfig): - raise ValueError("EmbeddingProvider must be initialized with a `EmbeddingConfig`.") - super().__init__(config) - - @abstractmethod - def get_embedding( - self, - text: str, - stage: PipeStage = PipeStage.BASE, - purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, - ): - pass - - @abstractmethod - def get_embeddings( - self, - texts: list[str], - stage: PipeStage = PipeStage.BASE, - purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, - ): - pass - - @abstractmethod - def rerank( - self, - query: str, - results: list[VectorSearchResult], - stage: PipeStage = PipeStage.RERANK, - limit: int = 10, - ): - pass - - @abstractmethod - def tokenize_string( - self, text: str, model: str, stage: PipeStage - ) -> list[int]: - pass - - def set_prefixes(self, config_prefixes: dict[str, str], base_model: str): - # Implementation of prefix setting - pass -``` - -### Creating a Custom Embedding Provider - -To create a custom Embedding provider, follow these steps: - -1. Create a new class that inherits from `EmbeddingProvider`. -2. Implement the required methods: `get_embedding`, `get_embeddings`, `rerank`, and `tokenize_string`. -3. (Optional) Implement async versions of methods if needed. -4. (Optional) Add any additional methods or attributes specific to your provider. - -Here's an example of a custom Embedding provider: - -```python -import numpy as np -from r2r.base import EmbeddingProvider, EmbeddingConfig -from r2r.abstractions.embedding import EmbeddingPurpose -from r2r.abstractions.search import VectorSearchResult - -class CustomEmbeddingProvider(EmbeddingProvider): - def __init__(self, config: EmbeddingConfig): - super().__init__(config) - # Initialize any custom attributes or models here - self.model = self._load_custom_model(config.base_model) - - def _load_custom_model(self, model_name): - # Load your custom embedding model here - pass - - def get_embedding( - self, - text: str, - stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, - purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, - ) -> list[float]: - # Apply prefix if available - if purpose in self.prefixes: - text = f"{self.prefixes[purpose]}{text}" - - # Generate embedding using your custom model - embedding = self.model.encode(text) - return embedding.tolist() - - def get_embeddings( - self, - texts: list[str], - stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, - purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, - ) -> list[list[float]]: - # Apply prefixes if available - if purpose in self.prefixes: - texts = [f"{self.prefixes[purpose]}{text}" for text in texts] - - # Generate embeddings in batches - all_embeddings = [] - for i in range(0, len(texts), self.config.batch_size): - batch = texts[i:i+self.config.batch_size] - batch_embeddings = self.model.encode(batch) - all_embeddings.extend(batch_embeddings.tolist()) - return all_embeddings - - def rerank( - self, - query: str, - results: list[VectorSearchResult], - stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, - limit: int = 10, - ) -> list[VectorSearchResult]: - if not self.config.rerank_model: - return results[:limit] - - # Implement custom reranking logic here - # This is a simple example using dot product similarity - query_embedding = self.get_embedding(query, stage, EmbeddingPurpose.QUERY) - for result in results: - result.score = np.dot(query_embedding, result.embedding) - - reranked_results = sorted(results, key=lambda x: x.score, reverse=True) - return reranked_results[:limit] - - def tokenize_string( - self, text: str, model: str, stage: EmbeddingProvider.PipeStage - ) -> list[int]: - # Implement custom tokenization logic - # This is a simple example using basic string splitting - return [ord(char) for word in text.split() for char in word] - - # Optionally implement async versions of methods - async def async_get_embedding(self, text: str, stage: EmbeddingProvider.PipeStage, purpose: EmbeddingPurpose): - # Implement async version if needed - return self.get_embedding(text, stage, purpose) - - async def async_get_embeddings(self, texts: list[str], stage: EmbeddingProvider.PipeStage, purpose: EmbeddingPurpose): - # Implement async version if needed - return self.get_embeddings(texts, stage, purpose) -``` - -### Registering and Using the Custom Provider - -To use your custom Embedding provider in R2R: - -1. Update the `EmbeddingConfig` class to include your custom provider: - -```python -class EmbeddingConfig(ProviderConfig): - # ...existing code... - - @property - def supported_providers(self) -> list[str]: - return [None, "openai", "ollama", "sentence-transformers", "custom"] # Add your custom provider here -``` - -2. Update your R2R configuration to use the custom provider: - -```toml -[embedding] -provider = "custom" -base_model = "your-custom-model" -base_dimension = 768 -batch_size = 32 - -[embedding.prefixes] -index = "Represent this document for retrieval: " -query = "Represent this query for retrieving relevant documents: " -``` - -3. In your R2R application, register the custom provider: - -```python -from r2r import R2R -from r2r.base import EmbeddingConfig -from your_module import CustomEmbeddingProvider - -def get_embedding_provider(config: EmbeddingConfig): - if config.provider == "custom": - return CustomEmbeddingProvider(config) - # ... handle other providers ... - -r2r = R2R(embedding_provider_factory=get_embedding_provider) -``` - -Now you can use your custom Embedding provider seamlessly within your R2R application: - -```python -# Ingest documents (embeddings will be generated using your custom provider) -r2r.ingest_files(["path/to/document.txt"]) - -# Perform a search -results = r2r.search("Your search query") - -# Use RAG -rag_response = r2r.rag("Your question here") -``` - -By following this structure, you can integrate any embedding model or service into R2R, maintaining consistency with the existing system while adding custom functionality as needed. This approach allows for great flexibility in choosing or implementing embedding solutions that best fit your specific use case.### Embedding Prefixes - -## Embedding Prefixes - -R2R supports embedding prefixes to enhance embedding quality for different purposes: - -1. **Index Prefixes**: Applied to documents during indexing. -2. **Query Prefixes**: Applied to search queries. - -Configure prefixes in your `r2r.toml` or when initializing the EmbeddingConfig. - -## Troubleshooting - -Common issues and solutions: - -1. **API Key Errors**: Ensure your API keys are correctly set and have the necessary permissions. -2. **Dimension Mismatch**: Verify that the `base_dimension` in your config matches the actual output of the chosen model. -3. **Out of Memory Errors**: Adjust the batch size or choose a smaller model if encountering memory issues with local models. - -## Performance Considerations - -1. **Batching**: Use batching for multiple, similar requests to improve throughput. -2. **Model Selection**: Balance between model capability and inference speed based on your use case. -3. **Caching**: Implement caching strategies to avoid re-embedding identical text. - -## Conclusion - -R2R's Embedding system provides a flexible and powerful foundation for integrating various embedding models into your applications. By understanding the available providers, configuration options, and best practices, you can effectively leverage embeddings to enhance your R2R-based projects. - -For an advanced example of implementing reranking in R2R. diff --git a/docs/documentation/deep-dive/providers/knowledge-graph.mdx b/docs/documentation/deep-dive/providers/knowledge-graph.mdx deleted file mode 100644 index 83448e73b..000000000 --- a/docs/documentation/deep-dive/providers/knowledge-graph.mdx +++ /dev/null @@ -1,149 +0,0 @@ ---- -title: 'Knowledge Graph' -description: 'Learn how to configure and use the knowledge graph provider in R2R' ---- - -## Introduction - -R2R's `KGProvider` handles the creation, management, and querying of knowledge graphs in your applications. This guide offers an in-depth look at the system's architecture, configuration options, and best practices for implementation. - -For a practical, step-by-step guide on implementing knowledge graphs in R2R, including code examples and common use cases, see our [GraphRAG Cookbook](/cookbooks/graphrag). - - -## Configuration - -### Knowledge Graph Configuration - -These are located in the `r2r.toml` file, under the `[database]` section. - -```toml -[database] -provider = "postgres" -batch_size = 256 - - [database.kg_creation_settings] - kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" - entity_types = ["Person", "Organization", "Location"] # if empty, all entities are extracted - relation_types = ["works at", "founded by", "invested in"] # if empty, all relations are extracted - max_knowledge_triples = 100 - fragment_merge_count = 4 # number of fragments to merge into a single extraction - generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for triplet extraction - - [database.kg_enrichment_settings] - max_description_input_length = 65536 # increase if you want more comprehensive descriptions - max_summary_input_length = 65536 # increase if you want more comprehensive summaries - generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering - leiden_params = {} -``` - - -Environment variables take precedence over the config settings in case of conflicts. The R2R Docker includes configuration options that facilitate integration with a combined Postgres+pgvector database setup. - - -## Implementation Guide - -### File Ingestion and Graph Construction - -```python -from r2r import R2RClient - -client = R2RClient("http://localhost:7272") - -result = client.ingest_files(["path/to/your/file.txt"]) - -# following will create a graph on all ingested files -document_ids = [] # add document ids that you want to create a graph on -creation_result = client.create_graph(document_ids) -print(f"Creation Result: {creation_result}") -# wait for the creation to complete - -enrichment_result = client.enrich_graph() # enrichment will run on all nodes in the graph -print(f"Enrichment Result: {enrichment_result}") -# wait for the enrichment to complete -``` - -### Graph-based Search - -There are two types of graph-based search: `local` and `global`. - -- `local` search is faster and more accurate, but it is not as comprehensive as `global` search. -- `global` search is slower and more comprehensive, but it will give you the most relevant results. Note that global search may perform a large number of LLM calls. - -```python -search_result = client.search( - query="Find founders who worked at Google", - kg_search_settings={"use_kg_search":True, "kg_search_type": "local"} -) -print(f"Search Result: {search_result}") -``` - -### Retrieval-Augmented Generation - -```python -rag_result = client.rag( - query="Summarize the achievements of founders who worked at Google", - kg_search_settings={"use_kg_search":True, "kg_search_type": "local"} -) -print(f"RAG Result: {rag_result}") -``` - -## Best Practices - -1. **Optimize Chunk Size**: Adjust the `chunk_size` based on your data and model capabilities. -2. **Use Domain-Specific Entity Types and Relations**: Customize these for more accurate graph construction. -3. **Balance Batch Size**: Adjust `batch_size` for optimal performance and resource usage. -4. **Implement Caching**: Cache frequently accessed graph data for improved performance. -5. **Regular Graph Maintenance**: Periodically clean and optimize your knowledge graph. - -## Advanced Topics - -### Custom Knowledge Graph Providers - -Extend the `KGProvider` class to implement custom knowledge graph providers: - -```python -from r2r.base import KGProvider, KGConfig - -class CustomKGProvider(KGProvider): - def __init__(self, config: KGConfig): - super().__init__(config) - # Custom initialization... - - def ingest_files(self, file_paths: List[str]): - # Custom implementation... - - def search(self, query: str, use_kg_search: bool = True): - # Custom implementation... - - # Implement other required methods... -``` - -### Integrating External Graph Databases - -To integrate with external graph databases: - -1. Implement a custom `KGProvider`. -2. Handle data synchronization between R2R and the external database. -3. Implement custom querying methods to leverage the external database's features. - -### Scaling Knowledge Graphs - -For large-scale applications: - -1. Implement graph partitioning for distributed storage and processing. -2. Use graph-specific indexing techniques for faster querying. -3. Consider using a graph computing framework for complex analytics. - -## Troubleshooting - -Common issues and solutions: - -1. **Ingestion Errors**: Check file formats and encoding. -2. **Query Performance**: Optimize graph structure and use appropriate indexes. -3. **Memory Issues**: Adjust batch sizes and implement pagination for large graphs. - -## Conclusion - -R2R's Knowledge Graph system provides a powerful foundation for building applications that require structured data representation and complex querying capabilities. By understanding its components, following best practices, and leveraging its flexibility, you can create sophisticated information retrieval and analysis systems tailored to your specific needs. - -For further customization and advanced use cases, refer to the [R2R API Documentation](/api-reference) and the [GraphRAG Cookbook](/cookbooks/graphrag). diff --git a/docs/documentation/deep-dive/providers/llms.mdx b/docs/documentation/deep-dive/providers/llms.mdx deleted file mode 100644 index fbb9be6f4..000000000 --- a/docs/documentation/deep-dive/providers/llms.mdx +++ /dev/null @@ -1,342 +0,0 @@ ---- -title: 'Language Models (LLMs)' -description: 'Configure and use multiple Language Model providers in R2R' ---- - -## Introduction - -R2R's `LLMProvider` supports multiple third-party Language Model (LLM) providers, offering flexibility in choosing and switching between different models based on your specific requirements. This guide provides an in-depth look at configuring and using various LLM providers within the R2R framework. - -## Architecture Overview - -R2R's LLM system is built on a flexible provider model: - -1. **LLM Provider**: An abstract base class that defines the common interface for all LLM providers. -2. **Specific LLM Providers**: Concrete implementations for different LLM services (e.g., OpenAI, LiteLLM). - -These providers work in tandem to ensure flexible and efficient language model integration. - -## Providers - -### LiteLLM Provider (Default) - -The default `LiteLLMProvider` offers a unified interface for multiple LLM services. - -Key features: -- Support for OpenAI, Anthropic, Vertex AI, HuggingFace, Azure OpenAI, Ollama, Together AI, and Openrouter -- Consistent API across different LLM providers -- Easy switching between models - -### OpenAI Provider - -The `OpenAILLM` class provides direct integration with OpenAI's models. - -Key features: -- Direct access to OpenAI's API -- Support for the latest OpenAI models -- Fine-grained control over model parameters - -### Local Models - -Support for running models locally using Ollama or other local inference engines, through LiteLLM. - -Key features: -- Privacy-preserving local inference -- Customizable model selection -- Reduced latency for certain use cases - -## Configuration - -### LLM Configuration - -Update the `completions` section in your `r2r.toml` file: -``` -[completions] -provider = "litellm" - -[completions.generation_config] -model = "gpt-4" -temperature = 0.7 -max_tokens = 150 -``` - -The provided `generation_config` is used to establish the default generation parameters for your deployment. These settings can be overridden at runtime, offering flexibility in your application. You can adjust parameters: - -1. At the application level, by modifying the R2R configuration -2. For individual requests, by passing custom parameters to the `rag` or `get_completion` methods -3. Through API calls, by including specific parameters in your request payload - -This allows you to fine-tune the behavior of your language model interactions on a per-use basis while maintaining a consistent baseline configuration. - - - -## Security Best Practices - -1. **API Key Management**: Use environment variables or secure key management solutions for API keys. -2. **Rate Limiting**: Implement rate limiting to prevent abuse of LLM endpoints. -3. **Input Validation**: Sanitize and validate all inputs before passing them to LLMs. -4. **Output Filtering**: Implement content filtering for LLM outputs to prevent inappropriate content. -5. **Monitoring**: Regularly monitor LLM usage and outputs for anomalies or misuse. - -## Custom LLM Providers in R2R - -### LLM Provider Structure - -The LLM system in R2R is built on two main components: - -1. `LLMConfig`: A configuration class for LLM providers. -2. `LLMProvider`: An abstract base class that defines the interface for all LLM providers. - -### LLMConfig - -The `LLMConfig` class is used to configure LLM providers: - -```python -from r2r.base import ProviderConfig -from r2r.base.abstractions.llm import GenerationConfig -from typing import Optional - -class LLMConfig(ProviderConfig): - provider: Optional[str] = None - generation_config: Optional[GenerationConfig] = None - - def validate(self) -> None: - if not self.provider: - raise ValueError("Provider must be set.") - if self.provider and self.provider not in self.supported_providers: - raise ValueError(f"Provider '{self.provider}' is not supported.") - - @property - def supported_providers(self) -> list[str]: - return ["litellm", "openai"] -``` - -### LLMProvider - -The `LLMProvider` is an abstract base class that defines the common interface for all LLM providers: - -```python -from abc import abstractmethod -from r2r.base import Provider -from r2r.base.abstractions.llm import GenerationConfig, LLMChatCompletion, LLMChatCompletionChunk - -class LLMProvider(Provider): - def __init__(self, config: LLMConfig) -> None: - if not isinstance(config, LLMConfig): - raise ValueError("LLMProvider must be initialized with a `LLMConfig`.") - super().__init__(config) - - @abstractmethod - def get_completion( - self, - messages: list[dict], - generation_config: GenerationConfig, - **kwargs, - ) -> LLMChatCompletion: - pass - - @abstractmethod - def get_completion_stream( - self, - messages: list[dict], - generation_config: GenerationConfig, - **kwargs, - ) -> LLMChatCompletionChunk: - pass -``` - -### Creating a Custom LLM Provider - -To create a custom LLM provider, follow these steps: - -1. Create a new class that inherits from `LLMProvider`. -2. Implement the required methods: `get_completion` and `get_completion_stream`. -3. (Optional) Add any additional methods or attributes specific to your provider. - -Here's an example of a custom LLM provider: - -```python -import logging -from typing import Generator -from r2r.base import LLMProvider, LLMConfig, LLMChatCompletion, LLMChatCompletionChunk -from r2r.base.abstractions.llm import GenerationConfig - -logger = logging.getLogger(__name__) - -class CustomLLMProvider(LLMProvider): - def __init__(self, config: LLMConfig) -> None: - super().__init__(config) - # Initialize any custom attributes or connections here - self.custom_client = self._initialize_custom_client() - - def _initialize_custom_client(self): - # Initialize your custom LLM client here - pass - - def get_completion( - self, - messages: list[dict], - generation_config: GenerationConfig, - **kwargs, - ) -> LLMChatCompletion: - # Implement the logic to get a completion from your custom LLM - response = self.custom_client.generate(messages, **generation_config.dict(), **kwargs) - - # Convert the response to LLMChatCompletion format - return LLMChatCompletion( - id=response.id, - choices=[ - { - "message": { - "role": "assistant", - "content": response.text - }, - "finish_reason": response.finish_reason - } - ], - usage={ - "prompt_tokens": response.usage.prompt_tokens, - "completion_tokens": response.usage.completion_tokens, - "total_tokens": response.usage.total_tokens - } - ) - - def get_completion_stream( - self, - messages: list[dict], - generation_config: GenerationConfig, - **kwargs, - ) -> Generator[LLMChatCompletionChunk, None, None]: - # Implement the logic to get a streaming completion from your custom LLM - stream = self.custom_client.generate_stream(messages, **generation_config.dict(), **kwargs) - - for chunk in stream: - yield LLMChatCompletionChunk( - id=chunk.id, - choices=[ - { - "delta": { - "role": "assistant", - "content": chunk.text - }, - "finish_reason": chunk.finish_reason - } - ] - ) - - # Add any additional methods specific to your custom provider - def custom_method(self, *args, **kwargs): - # Implement custom functionality - pass -``` - -### Registering and Using the Custom Provider - -To use your custom LLM provider in R2R: - -1. Update the `LLMConfig` class to include your custom provider: - -```python -class LLMConfig(ProviderConfig): - # ...existing code... - - @property - def supported_providers(self) -> list[str]: - return ["litellm", "openai", "custom"] # Add your custom provider here -``` - -2. Update your R2R configuration to use the custom provider: - -```toml -[completions] -provider = "custom" - -[completions.generation_config] -model = "your-custom-model" -temperature = 0.7 -max_tokens = 150 -``` - -3. In your R2R application, register the custom provider: - -```python -from r2r import R2R -from r2r.base import LLMConfig -from your_module import CustomLLMProvider - -def get_llm_provider(config: LLMConfig): - if config.provider == "custom": - return CustomLLMProvider(config) - # ... handle other providers ... - -r2r = R2R(llm_provider_factory=get_llm_provider) -``` - -Now you can use your custom LLM provider seamlessly within your R2R application: - -```python -messages = [ - {"role": "system", "content": "You are a helpful assistant."}, - {"role": "user", "content": "What is the capital of France?"} -] - -response = r2r.get_completion(messages) -print(response.choices[0].message.content) -``` - -By following this structure, you can integrate any LLM or service into R2R, maintaining consistency with the existing system while adding custom functionality as needed. -## Prompt Engineering - -R2R supports advanced prompt engineering techniques: - -1. **Template Management**: Create and manage reusable prompt templates. -2. **Dynamic Prompts**: Generate prompts dynamically based on context or user input. -3. **Few-shot Learning**: Incorporate examples in your prompts for better results. - -## Troubleshooting - -Common issues and solutions: - -1. **API Key Errors**: Ensure your API keys are correctly set and have the necessary permissions. -2. **Rate Limiting**: Implement exponential backoff for retries on rate limit errors. -3. **Context Length Errors**: Be mindful of the maximum context length for your chosen model. -4. **Model Availability**: Ensure the requested model is available and properly configured. - -## Performance Considerations - -1. **Batching**: Use batching for multiple, similar requests to improve throughput. -2. **Streaming**: Utilize streaming for long-form content generation to improve perceived latency. -3. **Model Selection**: Balance between model capability and inference speed based on your use case. - -## Server Configuration - -The `R2RConfig` class handles the configuration of various components, including LLMs. Here's a simplified version: - -```python -class R2RConfig: - REQUIRED_KEYS: dict[str, list] = { - # ... other keys ... - "completions": ["provider"], - # ... other keys ... - } - - def __init__(self, config_data: dict[str, Any]): - # Load and validate configuration - # ... - - # Set LLM configuration - self.completions = LLMConfig.create(**self.completions) - - # Override GenerationConfig defaults - GenerationConfig.set_default(**self.completions.get("generation_config", {})) - - # ... other initialization ... -``` - -This configuration system allows for flexible setup of LLM providers and their default parameters. - -## Conclusion - -R2R's LLM system provides a flexible and powerful foundation for integrating various language models into your applications. By understanding the available providers, configuration options, and best practices, you can effectively leverage LLMs to enhance your R2R-based projects. - -For further customization and advanced use cases, refer to the [R2R API Documentation](/api-reference) and [configuration guide](/documentation/configuration). diff --git a/docs/documentation/deployment/aws.mdx b/docs/documentation/deployment/aws.mdx deleted file mode 100644 index e62fe0997..000000000 --- a/docs/documentation/deployment/aws.mdx +++ /dev/null @@ -1,164 +0,0 @@ ---- -title: 'AWS' -description: 'Learn how to deploy R2R into AWS' -icon: 'Amazon' ---- -# Deploying R2R on Amazon Web Services (AWS) - -Amazon Web Services (AWS) provides a robust and scalable platform for deploying R2R (RAG to Riches). This guide will walk you through the process of setting up R2R on an Amazon EC2 instance, making it accessible both locally and publicly. - -## Overview - -Deploying R2R on AWS involves the following main steps: - -1. Creating an Amazon EC2 instance -2. Installing necessary dependencies -3. Setting up R2R -4. Configuring port forwarding for local access -5. Exposing ports for public access (optional) - -This guide assumes you have an AWS account and the necessary permissions to create and manage EC2 instances. - -## Creating an Amazon EC2 Instance - -1. Log in to the [AWS Management Console](https://aws.amazon.com/console/). -2. Navigate to EC2 under "Compute" services. -3. Click "Launch Instance". -4. Choose an Amazon Machine Image (AMI): - - Select "Ubuntu Server 22.04 LTS (HVM), SSD Volume Type" -5. Choose an Instance Type: - - For a small-mid sized organization (< 5000 users), select t3.xlarge (4 vCPU, 16 GiB Memory) or higher -6. Configure Instance Details: - - Leave default settings or adjust as needed -7. Add Storage: - - Set the root volume to at least 500 GiB -8. Add Tags (optional): - - Add any tags for easier resource management -9. Configure Security Group: - - Create a new security group - - Add rules to allow inbound traffic on ports 22 (SSH) and 7272 (R2R API) -10. Review and Launch: - - Review your settings and click "Launch" - - Choose or create a key pair for SSH access - -## Installing Dependencies - -SSH into your newly created EC2 instance: - -```bash -ssh -i /path/to/your-key.pem ubuntu@your-instance-public-dns -``` - -Now, run the following commands to install the necessary R2R dependencies: - -```bash -# Update package list and install Python and pip -sudo apt update -sudo apt install python3-pip - -# Install R2R -pip install r2r - -# Add R2R to PATH -echo 'export PATH=$PATH:$HOME/.local/bin' >> ~/.bashrc -source ~/.bashrc - -# Install Docker -sudo apt-get update -sudo apt-get install ca-certificates curl gnupg -sudo install -m 0755 -d /etc/apt/keyrings -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg -sudo chmod a+r /etc/apt/keyrings/docker.gpg - -echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - -sudo apt-get update -sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin - -# Add your user to the Docker group -sudo usermod -aG docker $USER -newgrp docker - -# Verify Docker installation -docker run hello-world -``` - -## Setting up R2R - -1. Serve your R2R backend: - -```bash -# Set required remote providers -export OPENAI_API_KEY=sk-... - -# Optional - pass in a custom configuration here -r2r serve --docker --full -``` - -2. Double check the health of the system: - -```bash -r2r health -``` - -3. Test ingesting and searching a sample document from a remote environment: - -```bash -# From your local machine -r2r --base-url=http://:7272 ingest-sample-file -sleep 10 -r2r --base-url=http://:7272 search --query='Who was aristotle?' -``` - -Replace `` with your EC2 instance's public IP address. - -## Configuring Port Forwarding for Local Access - -To access R2R from your local machine, use SSH port forwarding: - -```bash -ssh -i /path/to/your-key.pem -L 7273:localhost:7273 -L 7274:localhost:7274 ubuntu@your-instance-public-dns -``` - -## Exposing Ports for Public Access (Optional) - -To make R2R publicly accessible: - -1. In the AWS Management Console, go to EC2 > Security Groups. -2. Select the security group associated with your EC2 instance. -3. Click "Edit inbound rules". -4. Add a new rule: - - Type: Custom TCP - - Port range: 7272 - - Source: Anywhere (0.0.0.0/0) - - Description: Allow R2R API -5. Click "Save rules". - -6. Ensure R2R is configured to listen on all interfaces (0.0.0.0). - -After starting your R2R application, users can access it at: - -``` -http://:7272 -``` - -## Security Considerations - -- Use HTTPS (port 443) with a valid SSL certificate for production. -- Restrict source IP addresses in the security group rule if possible. -- Regularly update and patch your system and applications. -- Use AWS VPC for network isolation. -- Enable and configure AWS CloudTrail for auditing. -- Use AWS IAM roles for secure access management. -- Consider using AWS Certificate Manager for SSL/TLS certificates. -- Monitor incoming traffic using AWS CloudWatch. -- Remove or disable the security group rule when not needed for testing. - -## Conclusion - -You have now successfully deployed R2R on Amazon Web Services. The application should be accessible locally through SSH tunneling and optionally publicly through direct access to the EC2 instance. Remember to configure authentication and implement proper security measures before exposing your R2R instance to the public internet. - -For more information on configuring and using R2R, refer to the [configuration documentation](/documentation/configuration/introduction). diff --git a/docs/documentation/deployment/azure.mdx b/docs/documentation/deployment/azure.mdx deleted file mode 100644 index 591404002..000000000 --- a/docs/documentation/deployment/azure.mdx +++ /dev/null @@ -1,180 +0,0 @@ ---- -title: 'Azure' -description: 'Learn how to deploy R2R into Azure' -icon: 'Microsoft' ---- - -# Deploying R2R on Azure - -Azure provides a robust and scalable platform for deploying R2R (RAG to Riches). This guide will walk you through the process of setting up R2R on an Azure Virtual Machine, making it accessible both locally and publicly. - -## Overview - -Deploying R2R on Azure involves the following main steps: - -1. Creating an Azure Virtual Machine -2. Installing necessary dependencies -3. Setting up R2R -4. Configuring port forwarding for local access -5. Exposing ports for public access (optional) - -This guide assumes you have an Azure account and the necessary permissions to create and manage Virtual Machines. - -## Creating an Azure Virtual Machine - -1. Log in to the [Azure Portal](https://portal.azure.com/). -2. Click on "Create a resource" and search for "Virtual Machine". -3. Choose `Ubuntu Server 22.04 LTS - x64 Gen2` as the operating system. -4. Select a VM size with at least 16GB of RAM, 4-8 vCPU cores, and 500GB of disk for a small-mid sized organization (< 5000 users). The `D4s_v3` series is a good starting point. -5. Configure networking settings to allow inbound traffic on ports `22` (SSH), and optionally `7272` (R2R API). -6. Review and create the VM. - - -## Exposing Ports for Public Access (Optional) - -To make R2R publicly accessible: - -1. Log in to the Azure Portal. -2. Navigate to your VM > Networking > Network Security Group. -3. Add a new inbound security rule: - - Destination port ranges: 7272 - - Protocol: TCP - - Action: Allow - - Priority: 1000 (or lower than conflicting rules) - - Name: Allow_7272 - -4. Ensure R2R is configured to listen on all interfaces (0.0.0.0). - -After starting your R2R application, users can access it at: - -``` -http://:7272 -``` - - - - - -## Installing Dependencies - -SSH into your newly created VM with a command like `ssh -i my_pem.pem azureuser@`: - - - - - -Now, run the following commands to install the necessary R2R dependencies: - - -```bash -# Update package list and install Python and pip -sudo apt update -sudo apt install python3-pip - -# Install R2R -pip install r2r - -# Add R2R to PATH -echo 'export PATH=$PATH:$HOME/.local/bin' >> ~/.bashrc -source ~/.bashrc - -# Install Docker -sudo apt-get update -sudo apt-get install ca-certificates curl -sudo install -m 0755 -d /etc/apt/keyrings -sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc -sudo chmod a+r /etc/apt/keyrings/docker.asc - -echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ - $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null -sudo apt-get update - -sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin - -# Add your user to the Docker group -sudo usermod -aG docker $USER -newgrp docker - -# Verify Docker installation -docker run hello-world -``` - - - - - - -## Setting up R2R - -1. Serve your R2R backend - -```bash -# Set required remote providers -export OPENAI_API_KEY=sk-... - -# Optional - pass in a custom configuration here -r2r serve --docker --full -``` - - - - - - -2. Double check the health of the system - -```bash - r2r health -``` - -```bash -Time taken: 0.01 seconds -{'results': {'response': 'ok'}} -``` - -3. Test ingesting and searching a sample document from a remote environment - -```bash -# From your local machine - -r2r --base-url=http://:7272 ingest-sample-file -sleep 10 -r2r --base-url=http://:7272 search --query='Who was aristotle?' -``` - -```bash - -Time taken: 0.43 seconds -Sample file ingestion completed. Ingest files response: - -[{'message': 'Ingestion task queued successfully.', 'task_id': '887f9f99-cc18-4c1e-8f61-facf1d212334', 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1'}] -Vector search results: -{'fragment_id': 'ecc754cd-380d-585f-84ac-021542ef3c1d', 'extraction_id': '92d78034-8447-5046-bf4d-e019932fbc20', 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', 'collection_ids': [], 'score': 0.7822163571248282, 'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.', 'metadata': {'title': 'aristotle.txt', 'version': 'v0', 'chunk_order': 0, 'document_type': 'txt', 'unstructured_filetype': 'text/plain', 'unstructured_languages': ['eng'], 'partitioned_by_unstructured': True, 'associatedQuery': 'Who was aristotle?'}} -... -``` - -## Configuring Port Forwarding for Local Access - -To access R2R from your local machine, use SSH port forwarding: - -```bash -ssh i my_pem.pem -L 7273:localhost:7273 -L 7274:localhost:7274 azureuser@ -``` - -Replace `` with your Azure VM's public IP address or DNS name. Note that in the R2R dashboard you will still need to use the remote VM address as requests are made from the client-side. - -## Security Considerations - -- Use HTTPS (port 443) with a valid SSL certificate for production. -- Restrict source IP addresses in the security rule if possible. -- Regularly update and patch your system and applications. -- Monitor incoming traffic for suspicious activities. -- Remove or disable the rule when not needed for testing. - -## Conclusion - -You have now successfully deployed R2R on Azure. The application should be accessible locally through SSH tunneling and optionally publicly through direct access to the Azure VM. Remember to configure authentication and implement proper security measures before exposing your R2R instance to the public internet. - -For more information on configuring and using R2R, refer to the [configuration documentation](/documentation/configuration/introduction). diff --git a/docs/documentation/deployment/gcp.mdx b/docs/documentation/deployment/gcp.mdx deleted file mode 100644 index e701c8b0b..000000000 --- a/docs/documentation/deployment/gcp.mdx +++ /dev/null @@ -1,162 +0,0 @@ ---- -title: 'GCP' -description: 'Learn how to deploy R2R into Google Cloud' -icon: 'Google' ---- -# Deploying R2R on Google Cloud Platform - -Google Cloud Platform (GCP) offers a robust and scalable environment for deploying R2R (RAG to Riches). This guide will walk you through the process of setting up R2R on a Google Compute Engine instance, making it accessible both locally and publicly. - -## Overview - -Deploying R2R on GCP involves the following main steps: - -1. Creating a Google Compute Engine instance -2. Installing necessary dependencies -3. Setting up R2R -4. Configuring port forwarding for local access -5. Exposing ports for public access (optional) - -This guide assumes you have a Google Cloud account and the necessary permissions to create and manage Compute Engine instances. - -## Creating a Google Compute Engine Instance - -1. Log in to the [Google Cloud Console](https://console.cloud.google.com/). -2. Navigate to "Compute Engine" > "VM instances". -3. Click "Create Instance". -4. Choose the following settings: - - Name: Choose a name for your instance - - Region and Zone: Select based on your location/preferences - - Machine Configuration: - - Series: N1 - - Machine type: n1-standard-4 (4 vCPU, 15 GB memory) or higher - - Boot disk: - - Operating System: Ubuntu - - Version: Ubuntu 22.04 LTS - - Size: 500 GB - - Firewall: Allow HTTP and HTTPS traffic -5. Click "Create" to launch the instance. - -## Installing Dependencies - -SSH into your newly created instance using the Google Cloud Console or gcloud command: - -```bash -gcloud compute ssh --zone "your-zone" "your-instance-name" -``` - -Now, run the following commands to install the necessary R2R dependencies: - -```bash -# Update package list and install Python and pip -sudo apt update -sudo apt install python3-pip - -# Install R2R -pip install r2r - -# Add R2R to PATH -echo 'export PATH=$PATH:$HOME/.local/bin' >> ~/.bashrc -source ~/.bashrc - -# Install Docker -sudo apt-get update -sudo apt-get install ca-certificates curl gnupg -sudo install -m 0755 -d /etc/apt/keyrings -curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg -sudo chmod a+r /etc/apt/keyrings/docker.gpg - -echo \ - "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \ - $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ - sudo tee /etc/apt/sources.list.d/docker.list > /dev/null - -sudo apt-get update -sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin - -# Add your user to the Docker group -sudo usermod -aG docker $USER -newgrp docker - -# Verify Docker installation -docker run hello-world -``` - -## Setting up R2R - -1. Serve your R2R backend: - -```bash -# Set required remote providers -export OPENAI_API_KEY=sk-... - -# Optional - pass in a custom configuration here -r2r serve --docker --full -``` - -2. Double check the health of the system: - -```bash -r2r health -``` - -3. Test ingesting and searching a sample document from a remote environment: - -```bash -# From your local machine -r2r --base-url=http://:7272 ingest-sample-file -sleep 10 -r2r --base-url=http://:7272 search --query='Who was aristotle?' -``` - -Replace `` with your Google Compute Engine instance's external IP address. - -## Configuring Port Forwarding for Local Access - -To access R2R from your local machine, use SSH port forwarding: - -```bash -gcloud compute ssh --zone "your-zone" "your-instance-name" -- -L 7273:localhost:7273 -L 7274:localhost:7274 -``` - -## Exposing Ports for Public Access (Optional) - -To make R2R publicly accessible: - -1. In the Google Cloud Console, go to "VPC network" > "Firewall". -2. Click "Create Firewall Rule". -3. Configure the rule: - - Name: Allow-R2R - - Target tags: r2r-server - - Source IP ranges: 0.0.0.0/0 - - Specified protocols and ports: tcp:7272 -4. Click "Create". - -5. Add the network tag to your instance: - - Go to Compute Engine > VM instances. - - Click on your instance name. - - Click "Edit". - - Under "Network tags", add "r2r-server". - - Click "Save". - -6. Ensure R2R is configured to listen on all interfaces (0.0.0.0). - -After starting your R2R application, users can access it at: - -``` -http://:7272 -``` - -## Security Considerations - -- Use HTTPS (port 443) with a valid SSL certificate for production. -- Restrict source IP addresses in the firewall rule if possible. -- Regularly update and patch your system and applications. -- Monitor incoming traffic for suspicious activities. -- Remove or disable the firewall rule when not needed for testing. - -## Conclusion - -You have now successfully deployed R2R on Google Cloud Platform. The application should be accessible locally through SSH tunneling and optionally publicly through direct access to the Compute Engine instance. Remember to configure authentication and implement proper security measures before exposing your R2R instance to the public internet. - -For more information on configuring and using R2R, refer to the [configuration documentation](/documentation/configuration/introduction). diff --git a/docs/documentation/deployment/introduction.mdx b/docs/documentation/deployment/introduction.mdx deleted file mode 100644 index f737f3364..000000000 --- a/docs/documentation/deployment/introduction.mdx +++ /dev/null @@ -1,34 +0,0 @@ ---- -title: 'Introduction' -description: 'Learn how to deploy R2R' -icon: 'rocket' ---- - -# Deploying R2R - -R2R (RAG to Riches) is designed to be flexible and scalable, allowing deployment in various environments. This guide provides an overview of deployment options and resource recommendations to help you get started with R2R in a production setting. - -## Deployment Options - -1. **Local Docker or Local Build**: Ideal for development and testing. [Start here](/documentation/installation/overview). -2. **Single Cloud Instance**: Recommended for most small to medium-sized organizations. -3. **Container Orchestration** (Docker Swarm): Suitable for larger organizations or those requiring more granular resource control - -## Resource Recommendations - -When running R2R, we recommend: -- At least 4 vCPU cores -- 8+GB of RAM (16GB preferred) -- 50gb + 4x raw data size (_size of data to be ingested after converting to TXT_) of disk space - -## Deployment Guides - -For detailed, step-by-step instructions on deploying R2R in various environments, please refer to our specific deployment guides: - -- [Local Deployment](/documentation/installation/overview) -- [Azure Deployment](/documentation/deployment/azure) -- [SciPhi Cloud](/documentation/deployment/sciphi/) - -Choose the guide that best fits your infrastructure and scaling needs. Each guide provides specific instructions for setting up R2R in that environment, including necessary configurations and best practices. - -By following these deployment recommendations and configuration guides, you'll be well on your way to leveraging R2R's powerful RAG capabilities in your production environment. diff --git a/docs/documentation/deployment/sciphi.mdx b/docs/documentation/deployment/sciphi.mdx deleted file mode 100644 index 8529c8bae..000000000 --- a/docs/documentation/deployment/sciphi.mdx +++ /dev/null @@ -1,27 +0,0 @@ ---- -title: 'SciPhi Enterprise' -description: 'Fully managed R2R for enterprises' -icon: 'building' ---- - -# SciPhi Enterprise: Fully Managed R2R for Your Organization - -SciPhi offers a fully managed, enterprise-grade solution for deploying and scaling R2R (RAG to Riches) within your organization. The SciPhi Enterprise offering provides all the benefits of R2R, including multimodal ingestion, hybrid search, GraphRAG, user management, and observability, in a hassle-free, scalable environment. - -## Why Use SciPhi Enterprise? - -- **Fully Managed**: SciPhi handles the infrastructure, deployment, scaling, updates, and maintenance of R2R, so your team can focus on building RAG applications. -- **Scalable**: Seamlessly scale your R2R deployment to handle growing user bases, document collections, and query volumes. -- **Secure**: Benefit from enterprise-level security, compliance, and data privacy measures. -- **Customizable**: Tailor your R2R deployment to your organization's specific requirements and integrate with your existing systems and workflows. -- **Expert Support**: Get direct access to the SciPhi team for guidance, troubleshooting, and best practices. - -## Getting Started - -Getting started with SciPhi Enterprise is easy: - -1. **Contact SciPhi**: Reach out to their team at [founders@sciphi.ai](mailto:founders@sciphi.ai) to discuss your organization's RAG application needs and learn more about SciPhi Enterprise. -2. **Discovery**: SciPhi's experts will work with you to understand your requirements, existing systems, and goals for R2R within your organization. -3. **Deployment**: SciPhi will handle the deployment and configuration of R2R in your environment, whether cloud-based or on-premises, and integrate with your existing systems and workflows. -4. **Onboarding**: SciPhi will provide training and support to help your developers and users get up and running with R2R quickly and effectively. -5. **Ongoing Support**: SciPhi Enterprise provides ongoing support, updates, and guidance as you scale and evolve your RAG applications. diff --git a/docs/documentation/deployment/troubleshooting/api_connections.mdx b/docs/documentation/deployment/troubleshooting/api_connections.mdx deleted file mode 100644 index edaecd0dd..000000000 --- a/docs/documentation/deployment/troubleshooting/api_connections.mdx +++ /dev/null @@ -1,146 +0,0 @@ -# Troubleshooting Guide: R2R API Endpoints Not Responding - -When R2R API endpoints fail to respond, it can disrupt your entire workflow. This guide will help you diagnose and resolve issues related to unresponsive API endpoints. - -## 1. Verify API Service Status - -First, ensure that the R2R API service is running: - -```bash -docker ps | grep r2r -``` - -Look for a container with "r2r" in its name and check its status. - -## 2. Check API Logs - -Examine the logs of the R2R API container: - -```bash -docker logs -``` - -Look for error messages or exceptions that might indicate why the API is not responding. - -## 3. Common Issues and Solutions - -### 3.1 Network Connectivity - -**Symptom:** Unable to reach the API from outside the Docker network. - -**Solutions:** -- Verify port mappings in your Docker Compose file. -- Ensure the host firewall isn't blocking the API port. -- Check if the API is bound to the correct network interface (0.0.0.0 for all interfaces). - -### 3.2 Dependencies Not Ready - -**Symptom:** API starts but immediately exits or fails to initialize. - -**Solutions:** -- Verify that all required services (Postgres, etc.) are up and healthy. -- Check if the `depends_on` conditions in the Docker Compose file are correct. -- Increase the retry count or add a delay in the API service startup script. - -### 3.3 Configuration Errors - -**Symptom:** API logs show configuration-related errors. - -**Solutions:** -- Double-check environment variables in the Docker Compose file. -- Verify that the config file (if used) is correctly mounted and accessible. -- Ensure all required configuration parameters are set. - -### 3.4 Resource Constraints - -**Symptom:** API becomes unresponsive under load or fails to start due to lack of resources. - -**Solutions:** -- Check Docker host resources (CPU, memory, disk space). -- Adjust resource limits in Docker Compose file if necessary. -- Consider scaling the API service or upgrading the host machine. - -### 3.5 Database Connection Issues - -**Symptom:** API logs show database connection errors. - -**Solutions:** -- Verify database credentials and connection strings. -- Check if the database service is running and accessible. -- Ensure the database is initialized with the correct schema. - -### 3.6 Hatchet Integration Problems - -**Symptom:** API fails to communicate with Hatchet service. - -**Solutions:** -- Verify Hatchet service is running and healthy. -- Check Hatchet API key and configuration. -- Ensure network connectivity between R2R and Hatchet services. - -## 4. API-specific Debugging Steps - -1. **Test individual endpoints:** - Use tools like cURL or Postman to test specific endpoints and isolate the problem. - - ```bash - curl http://localhost:7272/v2/health - ``` - -2. **Check API documentation:** - Verify that you're using the correct endpoint URLs and request formats. - -3. **Monitor API metrics:** - If available, check API metrics for response times, error rates, and request volumes. - -4. **Verify API versioning:** - Ensure you're using the correct API version in your requests. - -## 5. Advanced Troubleshooting - -### 5.1 Network Debugging - -Use network debugging tools to diagnose connectivity issues: - -```bash -docker network inspect r2r-network -``` - -### 5.2 Interactive Debugging - -Access the R2R container interactively to run diagnostics: - -```bash -docker exec -it /bin/bash -``` - -### 5.3 API Server Logs - -If the API uses a separate web server (e.g., uvicorn), check its logs: - -```bash -docker exec cat /path/to/uvicorn.log -``` - -## 6. Preventive Measures - -1. Implement robust health checks in your Docker Compose file. -2. Use logging and monitoring tools to proactively detect issues. -3. Implement circuit breakers and fallback mechanisms in your application. -4. Regularly update R2R and its dependencies to the latest stable versions. - -## 7. Seeking Help - -If you're still experiencing issues: - -1. Gather all relevant logs, configurations, and error messages. -2. Check the R2R documentation and GitHub issues for similar problems. -3. Reach out to the R2R community on Discord or GitHub for support. -4. When reporting an issue, provide: - - R2R version - - Docker and Docker Compose versions - - Host OS and version - - Detailed description of the problem and steps to reproduce - - Relevant logs and configuration files (with sensitive information redacted) - -By following this guide, you should be able to diagnose and resolve most issues with unresponsive R2R API endpoints. Remember to approach the problem systematically and gather as much information as possible before seeking external help. diff --git a/docs/documentation/deployment/troubleshooting/api_responses.mdx b/docs/documentation/deployment/troubleshooting/api_responses.mdx deleted file mode 100644 index 03cd0e1f2..000000000 --- a/docs/documentation/deployment/troubleshooting/api_responses.mdx +++ /dev/null @@ -1,151 +0,0 @@ -# Troubleshooting Guide: Unexpected R2R API Responses - -When working with the R2R API, you might encounter responses that don't match your expectations. This guide will help you diagnose and resolve common issues related to unexpected API behavior. - -## 1. Verify API Endpoint and Request - -First, ensure you're using the correct API endpoint and making the proper request. - -### 1.1 Check API URL - -- Confirm you're using the correct base URL for your R2R instance. -- Verify the endpoint path is correct (e.g., `/v2/search` for search requests). - -### 1.2 Review Request Method - -- Ensure you're using the appropriate HTTP method (GET, POST, PUT, DELETE) for the endpoint. - -### 1.3 Validate Request Headers - -- Check that you've included all required headers, especially: - - `Content-Type: application/json` for POST requests - - Authorization header if required - -### 1.4 Inspect Request Body - -- For POST requests, verify the JSON structure of your request body. -- Ensure all required fields are present and correctly formatted. - -## 2. Common Unexpected Responses - -### 2.1 Empty Results - -**Symptom:** API returns an empty list or no results when you expect data. - -**Possible Causes:** -- No matching data in the database -- Incorrect search parameters -- Issues with data ingestion - -**Troubleshooting Steps:** -1. Verify your search query or filter parameters. -2. Check if the data you're expecting has been successfully ingested. -3. Try a broader search to see if any results are returned. - -### 2.2 Incorrect Data Format - -**Symptom:** API returns data in an unexpected format or structure. - -**Possible Causes:** -- Changes in API version -- Misconfiguration in R2R settings - -**Troubleshooting Steps:** -1. Check the API documentation for the correct response format. -2. Verify you're using the latest version of the API. -3. Review your R2R configuration settings. - -### 2.3 Unexpected Error Responses - -**Symptom:** API returns error codes or messages you don't expect. - -**Possible Causes:** -- Server-side issues -- Rate limiting -- Authorization problems - -**Troubleshooting Steps:** -1. Check the error message for specific details. -2. Verify your API key or authentication token. -3. Ensure you're not exceeding rate limits. -4. Check R2R server logs for more information. - -## 3. Debugging Techniques - -### 3.1 Use Verbose Logging - -Enable verbose logging in your API requests to get more detailed information: - -```python -import requests - -response = requests.get('http://your-r2r-api-url/v2/endpoint', - params={'verbose': True}) -print(response.json()) -``` - -### 3.2 Check R2R Server Logs - -Access the R2R server logs to look for any error messages or warnings: - -```bash -docker logs r2r-container-name -``` - -### 3.3 Use API Testing Tools - -Utilize tools like Postman or cURL to isolate API issues: - -```bash -curl -X GET "http://your-r2r-api-url/v2/health" -H "accept: application/json" -``` - -## 4. Common Issues and Solutions - -### 4.1 Inconsistent Search Results - -**Issue:** Search results vary unexpectedly between requests. - -**Solution:** -- Check if your data is being updated concurrently. -- Verify the consistency of your vector database (Postgres+pgvector). -- Ensure your search parameters are consistent. - -### 4.2 Slow API Response Times - -**Issue:** API requests take longer than expected to return results. - -**Solution:** -- Check the size of your dataset and consider optimization. -- Verify the performance of your database queries. -- Consider scaling your R2R deployment if dealing with large datasets. - -### 4.3 Unexpected Data Relationships - -**Issue:** API returns relationships between data that don't match expectations. - -**Solution:** -- Review your knowledge graph structure. -- Check the logic in your data ingestion and relationship creation processes. -- Verify that your query is correctly traversing the graph. - -## 5. When to Seek Further Help - -If you've gone through this guide and are still experiencing issues: - -1. Gather all relevant information: - - API request details (endpoint, method, headers, body) - - Full response (including headers and body) - - R2R server logs - - Any error messages or unexpected behavior - -2. Check the R2R documentation and community forums for similar issues. - -3. If the problem persists, consider reaching out to the R2R community or support channels: - - Post a detailed question on the R2R GitHub repository - - Join the R2R Discord community for real-time assistance - - Contact R2R support if you have a support agreement - -Remember to always provide as much context as possible when seeking help, including your R2R version, deployment method, and steps to reproduce the issue. - -By following this guide, you should be able to diagnose and resolve most unexpected API response issues in your R2R deployment. If you encounter persistent problems, don't hesitate to seek help from the R2R community or support channels. diff --git a/docs/documentation/deployment/troubleshooting/bad_configuration.mdx b/docs/documentation/deployment/troubleshooting/bad_configuration.mdx deleted file mode 100644 index 0dc738374..000000000 --- a/docs/documentation/deployment/troubleshooting/bad_configuration.mdx +++ /dev/null @@ -1,164 +0,0 @@ -# Troubleshooting Guide: Incompatible Configuration Settings in R2R - -When working with R2R (RAG to Riches), you may encounter issues related to incompatible configuration settings. This guide will help you identify and resolve common configuration conflicts. - -## 1. Identifying Configuration Issues - -Configuration issues often manifest as error messages during startup or unexpected behavior during runtime. Look for error messages related to configuration in your logs or console output. - -## 2. Common Incompatible Configurations - -### 2.1 Database Conflicts - -**Issue:** Conflicting database settings between different components. - -**Symptoms:** -- Error messages mentioning database connection failures -- Inconsistent data retrieval or storage - -**Resolution:** -1. Check your `r2r.toml` file for database settings. -2. Ensure all components (R2R, Hatchet, etc.) use the same database credentials. -3. Verify that the database URL, port, and name are consistent across all configurations. - -Example correction: -```toml -[database] -url = "postgres://user:password@localhost:5432/r2r_db" -``` - -### 2.2 LLM Provider Conflicts - -**Issue:** Multiple or incompatible LLM provider settings. - -**Symptoms:** -- Errors about undefined LLM providers -- Unexpected LLM behavior or responses - -**Resolution:** -1. Review your LLM provider settings in the configuration. -2. Ensure only one primary LLM provider is active. -3. Check that API keys and endpoints are correctly set for the chosen provider. - -Example correction: -```toml -[llm_providers] -primary = "openai" -[llm_providers.openai] -api_key = "your-openai-api-key" -``` - -### 2.3 Vector Store Misconfigurations - -**Issue:** Incompatible vector store settings. - -**Symptoms:** -- Errors related to vector operations or storage -- Failure to store or retrieve embeddings - -**Resolution:** -1. Verify that your chosen vector store (e.g., pgvector) is properly configured. -2. Ensure the vector store settings match your database configuration. -3. Check for any conflicting dimension settings in your embeddings configuration. - -Example correction: -```toml -[vector_store] -type = "pgvector" -dimension = 1536 # Must match your embedding model's output dimension -``` - -### 2.4 Hatchet Orchestration Conflicts - -**Issue:** Misconfigured Hatchet settings leading to orchestration failures. - -**Symptoms:** -- Errors in task queuing or execution -- Hatchet service failing to start or communicate - -**Resolution:** -1. Check Hatchet-related environment variables and configuration. -2. Ensure Hatchet API key and endpoint are correctly set. -3. Verify RabbitMQ settings if used for task queuing. - -Example correction: -```toml -[orchestration] -type = "hatchet" -api_key = "your-hatchet-api-key" -endpoint = "http://localhost:7077" -``` - -### 2.5 File Path and Permission Issues - -**Issue:** Incorrect file paths or insufficient permissions. - -**Symptoms:** -- Errors about missing files or directories -- Permission denied errors when accessing resources - -**Resolution:** -1. Verify all file paths in your configuration are correct and accessible. -2. Check permissions on directories used by R2R, especially in Docker environments. -3. Ensure consistency between host and container paths if using Docker. - -Example correction: -```toml -[file_storage] -base_path = "/app/data" # Ensure this path exists and is writable -``` - -## 3. Configuration Validation Steps - -1. **Use R2R's built-in validation:** - Run `r2r validate-config` to check for basic configuration errors. - -2. **Environment variable check:** - Ensure all required environment variables are set and not conflicting with configuration file settings. - -3. **Docker configuration:** - If using Docker, verify that your `docker-compose.yml` file correctly maps volumes and sets environment variables. - -4. **Component version compatibility:** - Ensure all components (R2R, database, vector store, LLM providers) are using compatible versions. - -## 4. Advanced Troubleshooting - -### 4.1 Configuration Debugging Mode - -Enable debug logging to get more detailed information about configuration loading: - -```toml -[logging] -level = "DEBUG" -``` - -### 4.2 Component Isolation - -If you're unsure which component is causing the issue, try running components in isolation: - -1. Start only the database and vector store. -2. Add the R2R core service. -3. Gradually add other services (Hatchet, LLM providers) one by one. - -This approach can help identify which specific component or interaction is causing the incompatibility. - -### 4.3 Configuration Diff Tool - -Use a diff tool to compare your current configuration with a known working configuration or the default template. This can help spot unintended changes or typos. - -## 5. Seeking Further Assistance - -If you're still experiencing issues after trying these solutions: - -1. Check the R2R documentation for any recent changes or known issues. -2. Search the R2R GitHub issues for similar problems and solutions. -3. Prepare a detailed description of your issue, including: - - Your full R2R configuration (with sensitive information redacted) - - Error messages and logs - - Steps to reproduce the issue -4. Reach out to the R2R community on Discord or file a GitHub issue with the prepared information. - -Remember, when sharing configurations or logs, always remove sensitive information like API keys or passwords. - -By following this guide, you should be able to identify and resolve most incompatible configuration settings in your R2R setup. If problems persist, don't hesitate to seek help from the R2R community or support channels. diff --git a/docs/documentation/deployment/troubleshooting/connection_strings.mdx b/docs/documentation/deployment/troubleshooting/connection_strings.mdx deleted file mode 100644 index 897842e9c..000000000 --- a/docs/documentation/deployment/troubleshooting/connection_strings.mdx +++ /dev/null @@ -1,169 +0,0 @@ -# Troubleshooting Guide: Connection String Errors in R2R Deployments - -Connection string errors can occur when R2R is unable to establish a connection with a database or service. This guide will help you diagnose and resolve common connection string issues. - -## 1. Identify the Error - -First, locate the specific error message in your logs. Common connection string errors include: - -- "Unable to connect to [service]" -- "Connection refused" -- "Authentication failed" -- "Invalid connection string" - -## 2. Common Issues and Solutions - -### 2.1 Incorrect Host or Port - -**Symptom:** Error messages mentioning "host not found" or "connection refused" - -**Possible Causes:** -- Typo in hostname or IP address -- Wrong port number -- Firewall blocking the connection - -**Solutions:** -1. Double-check the hostname/IP and port in your connection string -2. Verify the service is running on the specified port -3. Check firewall rules and ensure the port is open - -Example fix for PostgreSQL: -``` -# Before -DATABASE_URL=postgres://user:password@wronghost:5432/dbname - -# After -DATABASE_URL=postgres://user:password@correcthost:5432/dbname -``` - -### 2.2 Authentication Failures - -**Symptom:** Errors like "authentication failed" or "access denied" - -**Possible Causes:** -- Incorrect username or password -- User lacks necessary permissions - -**Solutions:** -1. Verify username and password are correct -2. Ensure the user has the required permissions on the database - -### 2.3 Invalid Connection String Format - -**Symptom:** Errors mentioning "invalid connection string" or specific syntax errors - -**Possible Causes:** -- Malformed connection string -- Missing required parameters - -**Solutions:** -1. Check the connection string format for the specific service -2. Ensure all required parameters are included - -Example fix for a generic connection string: -``` -# Before (missing password) -CONNECTION_STRING=Service=MyService;User ID=myuser;Server=myserver - -# After -CONNECTION_STRING=Service=MyService;User ID=myuser;Password=mypassword;Server=myserver -``` - -### 2.4 SSL/TLS Issues - -**Symptom:** Errors related to SSL handshake or certificate validation - -**Possible Causes:** -- SSL/TLS not properly configured -- Invalid or expired certificates - -**Solutions:** -1. Ensure SSL/TLS is correctly set up on both client and server -2. Update expired certificates -3. If testing, you may temporarily disable SSL (not recommended for production) - -Example fix for PostgreSQL with SSL: -``` -# Before (SSL enforced) -DATABASE_URL=postgres://user:password@host:5432/dbname?sslmode=require - -# After (disable SSL for testing only) -DATABASE_URL=postgres://user:password@host:5432/dbname?sslmode=disable -``` - -### 2.5 Database Not Found - -**Symptom:** Errors like "database does not exist" or "unknown database" - -**Possible Causes:** -- Typo in database name -- Database hasn't been created - -**Solutions:** -1. Verify the database name is correct -2. Ensure the database exists on the server - -Example fix: -``` -# Before -POSTGRES_DBNAME=wrongdbname - -# After -POSTGRES_DBNAME=correctdbname -``` - -## 3. Environment-Specific Troubleshooting - -### 3.1 Docker Environment - -If you're using Docker: - -1. Check if the service containers are running: - ``` - docker ps - ``` -2. Inspect the network to ensure services are on the same network: - ``` - docker network inspect r2r-network - ``` -3. Use Docker's DNS for hostnames (e.g., use `postgres` instead of `localhost` if `postgres` is the service name) - -### 3.2 Cloud Environments - -For cloud deployments: - -1. Verify that the database service is in the same region/zone as your application -2. Check VPC and subnet configurations -3. Ensure necessary firewall rules or security groups are set up correctly - -## 4. Debugging Steps - -1. **Test the connection independently:** - Use command-line tools to test the connection outside of R2R: - - For PostgreSQL: `psql -h -U -d ` - -2. **Check service logs:** - Examine logs of the service you're trying to connect to for any error messages or access attempts. - -3. **Use connection string builders:** - Many database providers offer online tools to help construct valid connection strings. - -## 5. Prevention and Best Practices - -1. Use environment variables for sensitive information in connection strings -2. Implement connection pooling to manage connections efficiently -3. Set up proper logging to quickly identify connection issues -4. Use secret management services for storing and retrieving connection credentials securely - -## 6. Seeking Further Help - -If you're still encountering issues: - -1. Check R2R documentation for specific connection string requirements -2. Consult the documentation of the specific database or service you're connecting to -3. Search or ask for help in R2R community forums or support channels -4. Provide detailed error messages and environment information when seeking help - -Remember to never share actual passwords or sensitive information when asking for help. Always use placeholders in examples. - -By following this guide, you should be able to resolve most connection string errors in your R2R deployment. If problems persist, don't hesitate to seek help from the R2R community or support team. diff --git a/docs/documentation/deployment/troubleshooting/database.mdx b/docs/documentation/deployment/troubleshooting/database.mdx deleted file mode 100644 index e8cee37ab..000000000 --- a/docs/documentation/deployment/troubleshooting/database.mdx +++ /dev/null @@ -1,131 +0,0 @@ -# Troubleshooting Guide: Database Connection Failures in R2R - -Database connection issues can significantly impact the functionality of your R2R deployment. This guide will help you diagnose and resolve common database connection problems for both Postgres. - -## 1. General Troubleshooting Steps - -Before diving into database-specific issues, try these general troubleshooting steps: - -1. **Check Database Service Status**: Ensure the database service is running. - ```bash - docker ps | grep postgres - ``` - -2. **Verify Network Connectivity**: Ensure the R2R service can reach the database. - ```bash - docker exec r2r-container ping postgres - ``` - -3. **Check Logs**: Examine R2R and database container logs for error messages. - ```bash - docker logs r2r-container - docker logs postgres-container - ``` - -4. **Verify Environment Variables**: Ensure all necessary environment variables are correctly set in your Docker Compose file or deployment configuration. - -## 2. Postgres Connection Issues - -### 2.1 Common Postgres Error Messages - -- "FATAL: password authentication failed for user" -- "FATAL: database does not exist" -- "could not connect to server: Connection refused" - -### 2.2 Troubleshooting Steps for Postgres - -1. **Check Postgres Connection String**: - - Verify the `POSTGRES_*` environment variables in your R2R configuration. - - Ensure the host, port, username, password, and database name are correct. - -2. **Test Postgres Connection**: - ```bash - docker exec postgres-container psql -U your_username -d your_database -c "SELECT 1;" - ``` - -3. **Check Postgres Logs**: - ```bash - docker logs postgres-container - ``` - -4. **Verify Postgres User and Database**: - ```bash - docker exec postgres-container psql -U postgres -c "\du" - docker exec postgres-container psql -U postgres -c "\l" - ``` - -5. **Check Postgres Network Settings**: - - Ensure Postgres is configured to accept connections from other containers. - - Verify the `pg_hba.conf` file allows connections from the R2R container's IP range. - -### 2.3 Common Solutions for Postgres Issues - -- Update the Postgres connection string in R2R configuration. -- Recreate the Postgres user or database if they're missing. -- Modify Postgres network settings to allow connections from R2R. - -## 3. Advanced Troubleshooting - -### 3.1 Database Container Health Checks - -Ensure your Docker Compose file includes proper health checks for database services: - -```yaml -healthcheck: - test: ["CMD-SHELL", "pg_isready -U postgres"] - interval: 10s - timeout: 5s - retries: 5 -``` - -### 3.2 Network Debugging - -If network issues persist: - -1. Inspect the Docker network: - ```bash - docker network inspect r2r-network - ``` - -2. Use network debugging tools within containers: - ```bash - docker exec r2r-container netstat -tuln - docker exec postgres-container netstat -tuln - ``` - -### 3.3 Volume Permissions - -Check if volume permissions are causing issues: - -1. Inspect volume permissions: - ```bash - docker exec postgres-container ls -l /var/lib/postgresql/data - ``` - -2. Adjust permissions if necessary: - ```bash - docker exec postgres-container chown -R postgres:postgres /var/lib/postgresql/data - ``` - -## 4. Preventive Measures - -To avoid future database connection issues: - -1. Use Docker secrets or environment files for sensitive information. -2. Implement retry logic in your application for database connections. -3. Set up monitoring and alerting for database health and connectivity. -4. Regularly backup your database and test restoration procedures. - -## 5. Seeking Further Help - -If you're still experiencing issues: - -1. Gather all relevant logs and configuration files. -2. Check R2R documentation and community forums. -3. Consider posting a detailed question on the R2R GitHub repository or community channels, providing: - - Docker Compose file (with sensitive information redacted) - - R2R and database version information - - Detailed error messages and logs - - Steps to reproduce the issue - -By following this guide, you should be able to diagnose and resolve most database connection issues in your R2R deployment. Remember to always keep your database and R2R versions compatible and up to date. diff --git a/docs/documentation/deployment/troubleshooting/dependencies.mdx b/docs/documentation/deployment/troubleshooting/dependencies.mdx deleted file mode 100644 index 9c8ad2ea7..000000000 --- a/docs/documentation/deployment/troubleshooting/dependencies.mdx +++ /dev/null @@ -1,171 +0,0 @@ -# R2R Troubleshooting Guide: Dependency Conflicts - -Dependency conflicts can occur when different components of the R2R system require incompatible versions of the same library or when there are conflicts between system-level dependencies. This guide will help you identify and resolve common dependency issues. - -## 1. Identifying Dependency Conflicts - -### Symptoms: -- Error messages mentioning version conflicts -- Unexpected behavior in specific components -- Installation or startup failures - -### Common Error Messages: -- "ImportError: cannot import name X from Y" -- "AttributeError: module X has no attribute Y" -- "ModuleNotFoundError: No module named X" - -## 2. Python Package Conflicts - -### 2.1 Diagnosing the Issue - -1. Check your Python environment: - ```bash - python --version - pip list - ``` - -2. Look for conflicting versions in the pip list output. - -3. Use `pip check` to identify dependency conflicts: - ```bash - pip check - ``` - -### 2.2 Resolving Python Package Conflicts - -1. **Update R2R and its dependencies:** - ```bash - pip install --upgrade r2r[core] - ``` - -2. **Use a virtual environment:** - ```bash - python -m venv r2r_env - source r2r_env/bin/activate # On Windows, use r2r_env\Scripts\activate - pip install r2r[core] - ``` - -3. **Manually resolve conflicts:** - - Identify the conflicting packages - - Upgrade or downgrade specific packages: - ```bash - pip install package_name==specific_version - ``` - -4. **Use `pip-compile` for deterministic builds:** - ```bash - pip install pip-tools - pip-compile requirements.in - pip-sync requirements.txt - ``` - -## 3. System-level Dependency Conflicts - -### 3.1 Diagnosing System Conflicts - -1. Check system library versions: - ```bash - ldd --version - ldconfig -p | grep library_name - ``` - -2. Look for error messages related to shared libraries: - - "error while loading shared libraries" - - "symbol lookup error" - -### 3.2 Resolving System-level Conflicts - -1. **Update system packages:** - ```bash - sudo apt update && sudo apt upgrade # For Ubuntu/Debian - sudo yum update # For CentOS/RHEL - ``` - -2. **Install missing libraries:** - ```bash - sudo apt install library_name # For Ubuntu/Debian - sudo yum install library_name # For CentOS/RHEL - ``` - -3. **Use container isolation:** - - Consider using Docker to isolate the R2R environment from the host system. - -## 4. Docker-specific Dependency Issues - -### 4.1 Diagnosing Docker Issues - -1. Check Docker image versions: - ```bash - docker images - ``` - -2. Inspect Docker logs: - ```bash - docker logs container_name - ``` - -### 4.2 Resolving Docker Dependency Conflicts - -1. **Update Docker images:** - ```bash - docker pull ragtoriches/prod:main-unstructured - ``` - -2. **Rebuild with no cache:** - ```bash - docker-compose build --no-cache - ``` - -3. **Check Docker Compose file:** - - Ensure all services are using compatible versions - - Update service versions if necessary - - -## 6. Advanced Troubleshooting - -### 6.1 Use Dependency Visualization Tools - -1. Install `pipdeptree`: - ```bash - pip install pipdeptree - ``` - -2. Visualize dependencies: - ```bash - pipdeptree -p r2r - ``` - -### 6.2 Analyze Startup Sequences - -1. Use verbose logging: - ```bash - R2R_LOG_LEVEL=DEBUG r2r serve - ``` - -2. Analyze logs for import errors or version conflicts - -### 6.3 Temporary Workarounds - -1. **Pin problematic dependencies:** - - Create a `constraints.txt` file with specific versions - - Install with constraints: - ```bash - pip install -c constraints.txt r2r[core] - ``` - -2. **Use compatibility mode:** - - If available, run R2R with a compatibility flag to use older versions of certain components - -## 7. Seeking Further Help - -If you've tried these steps and still encounter issues: - -1. Check the [R2R GitHub Issues](https://github.com/SciPhi-AI/R2R/issues) for similar problems and solutions -2. Join the [R2R Discord community](https://discord.gg/p6KqD2kjtB) for real-time support -3. Open a new issue on GitHub with: - - Detailed description of the problem - - Steps to reproduce - - Environment details (OS, Python version, pip list output) - - Relevant log snippets - -Remember, when dealing with dependency conflicts, it's crucial to document your changes and test thoroughly after each modification to ensure you haven't introduced new issues while solving existing ones. diff --git a/docs/documentation/deployment/troubleshooting/docker.mdx b/docs/documentation/deployment/troubleshooting/docker.mdx deleted file mode 100644 index 07002f39e..000000000 --- a/docs/documentation/deployment/troubleshooting/docker.mdx +++ /dev/null @@ -1,154 +0,0 @@ -# Troubleshooting Guide: Docker Containers Failing to Start - -When Docker containers fail to start, it can be frustrating and halt your development process. This guide will help you diagnose and resolve common issues. - -## 1. Check Container Status - -First, check the status of your containers: - -```bash -docker ps -a -``` - -Look for containers with a status of "Exited" or "Created" but not "Up". - -## 2. View Container Logs - -For containers that failed to start, view the logs: - -```bash -docker logs -``` - -Look for error messages that might indicate why the container failed to start. - -## 3. Common Issues and Solutions - -### 3.1 Port Conflicts - -**Symptom:** Error message about ports already in use. - -**Solution:** -- Check if the port is already in use by another process: - ```bash - sudo lsof -i : - ``` -- Change the port mapping in your Docker run command or docker-compose file. - -### 3.2 Missing Environment Variables - -**Symptom:** Container exits immediately, logs show errors about missing environment variables. - -**Solution:** -- Ensure all required environment variables are set in your Docker run command or docker-compose file. -- Double-check your .env file if you're using one. - -### 3.3 Insufficient System Resources - -**Symptom:** Container fails to start, logs mention memory or CPU limits. - -**Solution:** -- Check your system resources: - ```bash - docker info - ``` -- Adjust resource limits in Docker settings or in your container configuration. - -### 3.4 Image Not Found - -**Symptom:** Error message about the image not being found. - -**Solution:** -- Ensure the image exists locally or is accessible from the specified registry: - ```bash - docker images - ``` -- Pull the image manually if needed: - ```bash - docker pull : - ``` - -### 3.5 Volume Mount Issues - -**Symptom:** Errors related to volume mounts or missing files. - -**Solution:** -- Verify that the paths for volume mounts exist on the host system. -- Check permissions on the host directories. - -### 3.6 Network Issues - -**Symptom:** Container can't connect to other services or the internet. - -**Solution:** -- Check Docker network settings: - ```bash - docker network ls - docker network inspect - ``` -- Ensure the container is connected to the correct network. - -## 4. Debugging Steps - -If the above solutions don't resolve the issue: - -1. **Run the container in interactive mode:** - ```bash - docker run -it --entrypoint /bin/bash - ``` - This allows you to explore the container environment. - -2. **Check container health:** - If your container has a HEALTHCHECK instruction, review its status: - ```bash - docker inspect --format='{{json .State.Health}}' - ``` - -3. **Review Dockerfile:** - Ensure your Dockerfile is correctly configured, especially the CMD or ENTRYPOINT instructions. - -4. **Verify dependencies:** - Make sure all required dependencies are installed and correctly configured in the image. - -## 5. Advanced Troubleshooting - -### 5.1 Docker Daemon Logs - -Check Docker daemon logs for system-level issues: - -```bash -sudo journalctl -u docker.service -``` - -### 5.2 Docker Events - -Monitor Docker events in real-time: - -```bash -docker events -``` - -### 5.3 Resource Constraints - -Review and adjust resource constraints: - -```bash -docker update --cpu-shares 512 --memory 512M -``` - -## 6. Seeking Help - -If you're still stuck: - -1. Gather all relevant logs and configuration files. -2. Check Docker documentation and community forums. -3. If using a specific service (like R2R), consult their support channels or documentation. -4. Consider posting a detailed question on Stack Overflow or Docker community forums. - -Remember to always provide: -- Docker version (`docker version`) -- Host OS and version -- Detailed error messages -- Steps to reproduce the issue - -By following this guide, you should be able to diagnose and resolve most issues with Docker containers failing to start. If problems persist, don't hesitate to seek help from the Docker community or relevant support channels. diff --git a/docs/documentation/deployment/troubleshooting/environment.mdx b/docs/documentation/deployment/troubleshooting/environment.mdx deleted file mode 100644 index 838963ec7..000000000 --- a/docs/documentation/deployment/troubleshooting/environment.mdx +++ /dev/null @@ -1,134 +0,0 @@ -# R2R Troubleshooting Guide: Missing Environment Variables - -When deploying R2R, missing environment variables can cause containers to fail to start or lead to unexpected behavior. This guide will help you identify and resolve issues related to missing environment variables. - -## 1. Identifying the Problem - -Signs that you might be dealing with missing environment variables: - -- Containers exit immediately after starting -- Error messages in logs mentioning undefined or null values -- Specific features or integrations not working as expected - -## 2. Common Missing Environment Variables - -Here are some critical environment variables for R2R: - -- Database credentials (e.g., `R2R_POSTGRES_USER`, `R2R_POSTGRES_PASSWORD`) -- API keys (e.g., `OPENAI_API_KEY`, `ANTHROPIC_API_KEY`) -- Configuration settings (e.g., `R2R_CONFIG_NAME`, `R2R_CONFIG_PATH`) - -## 3. Checking for Missing Variables - -### 3.1 Review Docker Compose File - -1. Open your `docker-compose.yml` file. -2. Look for the `environment` section under the `r2r` service. -3. Ensure all required variables are listed. - -Example: -```yaml -services: - r2r: - environment: - - R2R_POSTGRES_USER=${R2R_POSTGRES_USER:-postgres} - - OPENAI_API_KEY=${OPENAI_API_KEY:-} - - R2R_CONFIG_NAME=${R2R_CONFIG_NAME:-} - # ... other variables -``` - -### 3.2 Check .env File - -1. Ensure you have a `.env` file in the same directory as your `docker-compose.yml`. -2. Verify that all variables used in `docker-compose.yml` are defined in `.env`. - -Example `.env` file: -``` -R2R_POSTGRES_USER=myuser -OPENAI_API_KEY=sk-... -R2R_CONFIG_NAME=default -``` - -### 3.3 Verify Environment in Running Container - -If the container starts but behaves unexpectedly: - -1. Access the container's shell: - ``` - docker exec -it r2r-container-name /bin/bash - ``` -2. Check environment variables: - ``` - env | grep VARIABLE_NAME - ``` - -## 4. Resolving Missing Variables - -### 4.1 Update .env File - -1. Add any missing variables to your `.env` file. -2. Ensure values are correct and properly formatted. - -### 4.2 Use Default Values - -In `docker-compose.yml`, provide default values for non-sensitive variables: - -```yaml -environment: - - VARIABLE_NAME=${VARIABLE_NAME:-default_value} -``` - -### 4.3 Inject Variables at Runtime - -For sensitive data, inject variables when running the container: - -```bash -docker run -e SENSITIVE_VAR=value ... -``` - -Or with docker-compose: - -```bash -SENSITIVE_VAR=value docker-compose up -``` - -### 4.4 Use Docker Secrets - -For enhanced security, consider using Docker secrets for sensitive data: - -1. Create a secret: - ``` - echo "mysecretvalue" | docker secret create my_secret - - ``` - -2. Use in `docker-compose.yml`: - ```yaml - secrets: - - my_secret - ``` - -## 5. Specific R2R Environment Variables - -Ensure these key R2R variables are set: - -- `R2R_CONFIG_NAME` or `R2R_CONFIG_PATH`: Specifies which configuration to use. -- `R2R_POSTGRES_*`: Database connection details. -- `OPENAI_API_KEY`: If using OpenAI services. -- `ANTHROPIC_API_KEY`: If using Anthropic models. -- `OLLAMA_API_BASE`: For local LLM integration. -- `HATCHET_CLIENT_TOKEN`: For Hatchet integration. - -## 6. Debugging Tips - -- Use `docker-compose config` to see the final composed configuration with resolved variables. -- Temporarily echo sensitive variables in your container's entrypoint script for debugging (remove in production). -- Check for typos in variable names both in `docker-compose.yml` and `.env` files. - -## 7. Best Practices - -- Use version control for your `docker-compose.yml`, but not for `.env` files containing secrets. -- Consider using a secret management service for production deployments. -- Document all required environment variables in your project's README. -- Use CI/CD pipelines to validate the presence of required variables before deployment. - -By following this guide, you should be able to identify and resolve issues related to missing environment variables in your R2R deployment. Remember to always handle sensitive data securely and never commit secrets to version control. diff --git a/docs/documentation/deployment/troubleshooting/firewall.mdx b/docs/documentation/deployment/troubleshooting/firewall.mdx deleted file mode 100644 index 7aa3a887a..000000000 --- a/docs/documentation/deployment/troubleshooting/firewall.mdx +++ /dev/null @@ -1,148 +0,0 @@ -# R2R Security Group and Firewall Configuration Guide - -Proper security group and firewall configuration is crucial for securing your R2R deployment while ensuring necessary services remain accessible. This guide covers configurations for both cloud environments and local deployments. - -## Cloud Environments (AWS, Azure, GCP) - -### AWS Security Groups - -1. Create a new security group for your R2R deployment: - -```bash -aws ec2 create-security-group --group-name R2R-SecurityGroup --description "Security group for R2R deployment" -``` - -2. Configure inbound rules: - -```bash -# Allow SSH access (restrict to your IP if possible) -aws ec2 authorize-security-group-ingress --group-name R2R-SecurityGroup --protocol tcp --port 22 --cidr 0.0.0.0/0 - -# Allow access to R2R API -aws ec2 authorize-security-group-ingress --group-name R2R-SecurityGroup --protocol tcp --port 7272 --cidr 0.0.0.0/0 - -# Allow access to R2R Dashboard -aws ec2 authorize-security-group-ingress --group-name R2R-SecurityGroup --protocol tcp --port 8001 --cidr 0.0.0.0/0 - -# Allow access to Hatchet Dashboard -aws ec2 authorize-security-group-ingress --group-name R2R-SecurityGroup --protocol tcp --port 8002 --cidr 0.0.0.0/0 -``` - -### Azure Network Security Groups - -1. Create a new Network Security Group: - -```bash -az network nsg create --name R2R-NSG --resource-group YourResourceGroup --location YourLocation -``` - -2. Add inbound security rules: - -```bash -# Allow SSH access -az network nsg rule create --name AllowSSH --nsg-name R2R-NSG --priority 100 --resource-group YourResourceGroup --access Allow --direction Inbound --protocol Tcp --source-address-prefixes '*' --source-port-ranges '*' --destination-address-prefixes '*' --destination-port-ranges 22 - -# Allow R2R API access -az network nsg rule create --name AllowR2RAPI --nsg-name R2R-NSG --priority 200 --resource-group YourResourceGroup --access Allow --direction Inbound --protocol Tcp --source-address-prefixes '*' --source-port-ranges '*' --destination-address-prefixes '*' --destination-port-ranges 7272 - -# Allow R2R Dashboard access -az network nsg rule create --name AllowR2RDashboard --nsg-name R2R-NSG --priority 300 --resource-group YourResourceGroup --access Allow --direction Inbound --protocol Tcp --source-address-prefixes '*' --source-port-ranges '*' --destination-address-prefixes '*' --destination-port-ranges 8001 - -# Allow Hatchet Dashboard access -az network nsg rule create --name AllowHatchetDashboard --nsg-name R2R-NSG --priority 400 --resource-group YourResourceGroup --access Allow --direction Inbound --protocol Tcp --source-address-prefixes '*' --source-port-ranges '*' --destination-address-prefixes '*' --destination-port-ranges 8002 -``` - -### Google Cloud Platform Firewall Rules - -1. Create firewall rules: - -```bash -# Allow SSH access -gcloud compute firewall-rules create allow-ssh --direction=INGRESS --priority=1000 --network=default --action=ALLOW --rules=tcp:22 --source-ranges=0.0.0.0/0 - -# Allow R2R API access -gcloud compute firewall-rules create allow-r2r-api --direction=INGRESS --priority=1000 --network=default --action=ALLOW --rules=tcp:7272 --source-ranges=0.0.0.0/0 - -# Allow R2R Dashboard access -gcloud compute firewall-rules create allow-r2r-dashboard --direction=INGRESS --priority=1000 --network=default --action=ALLOW --rules=tcp:8001 --source-ranges=0.0.0.0/0 - -# Allow Hatchet Dashboard access -gcloud compute firewall-rules create allow-hatchet-dashboard --direction=INGRESS --priority=1000 --network=default --action=ALLOW --rules=tcp:8002 --source-ranges=0.0.0.0/0 -``` - -## Local Deployments - -For local deployments, you'll need to configure your operating system's firewall. Here are instructions for common operating systems: - -### Ubuntu/Debian (UFW) - -```bash -# Allow SSH -sudo ufw allow 22/tcp - -# Allow R2R API -sudo ufw allow 7272/tcp - -# Allow R2R Dashboard -sudo ufw allow 8001/tcp - -# Allow Hatchet Dashboard -sudo ufw allow 8002/tcp - -# Enable the firewall -sudo ufw enable -``` - -### CentOS/RHEL (firewalld) - -```bash -# Allow SSH -sudo firewall-cmd --permanent --add-port=22/tcp - -# Allow R2R API -sudo firewall-cmd --permanent --add-port=7272/tcp - -# Allow R2R Dashboard -sudo firewall-cmd --permanent --add-port=8001/tcp - -# Allow Hatchet Dashboard -sudo firewall-cmd --permanent --add-port=8002/tcp - -# Reload firewall -sudo firewall-cmd --reload -``` - -### Windows (Windows Firewall) - -1. Open Windows Defender Firewall with Advanced Security -2. Click on "Inbound Rules" and then "New Rule" -3. Choose "Port" and click "Next" -4. Select "TCP" and enter the specific ports (22, 7272, 8001, 8002) -5. Choose "Allow the connection" and click "Next" -6. Apply the rule to all profiles (Domain, Private, Public) -7. Give the rule a name (e.g., "R2R Ports") and click "Finish" - -## Best Practices - -1. **Least Privilege**: Only open ports that are absolutely necessary. -2. **IP Restrictions**: When possible, restrict access to known IP addresses or ranges. -3. **Use VPN**: For added security, consider using a VPN for accessing administrative interfaces. -4. **Regular Audits**: Periodically review and update your security group and firewall rules. -5. **Monitoring**: Implement logging and monitoring for all allowed ports. -6. **HTTPS**: Use HTTPS for all web interfaces and APIs when possible. - -## Verifying Configuration - -After setting up your firewall rules, verify that the necessary ports are open: - -```bash -# For Linux systems -nmap -p 22,7272,8001,8002,7474 your_server_ip - -# For Windows systems (requires nmap installation) -nmap -p 22,7272,8001,8002,7474 your_server_ip -``` - -This should show the status of each port (open or closed). - -Remember to adjust these configurations based on your specific deployment needs and security requirements. Always follow your organization's security policies and best practices. diff --git a/docs/documentation/deployment/troubleshooting/high_usage.mdx b/docs/documentation/deployment/troubleshooting/high_usage.mdx deleted file mode 100644 index 3159b73e1..000000000 --- a/docs/documentation/deployment/troubleshooting/high_usage.mdx +++ /dev/null @@ -1,162 +0,0 @@ -# Troubleshooting Guide: High Resource Usage in R2R Docker Deployments - -High resource usage in R2R Docker deployments can lead to performance issues, system instability, or even service outages. This guide will help you identify the cause of high resource usage and provide steps to mitigate the problem. - -## 1. Identifying the Issue - -First, determine which resources are being overused: - -### 1.1 Check Overall System Resources - -Use the `top` or `htop` command to get an overview of system resource usage: - -```bash -top -``` - -Look for high CPU usage, memory consumption, or swap usage. - -### 1.2 Check Docker-specific Resource Usage - -Use Docker's built-in commands to check resource usage for containers: - -```bash -docker stats -``` - -This will show CPU, memory, and I/O usage for each container. - -## 2. Common Causes and Solutions - -### 2.1 High CPU Usage - -#### Possible Causes: -- Inefficient queries or data processing -- Continuous background tasks -- Improperly configured LLM inference - -#### Solutions: -1. **Optimize queries:** - - Review and optimize database queries, especially those involving large datasets. - -2. **Adjust background task frequency:** - - Review Hatchet workflows and adjust the frequency of recurring tasks. - - Implement rate limiting for resource-intensive operations. - -3. **LLM configuration:** - - If using local LLMs via Ollama, consider adjusting model parameters or switching to a lighter model. - - For cloud LLMs, implement caching to reduce redundant API calls. - -4. **Scale horizontally:** - - Consider distributing the workload across multiple R2R instances. - -### 2.2 High Memory Usage - -#### Possible Causes: -- Memory leaks in custom code -- Inefficient caching -- Large in-memory datasets - -#### Solutions: -1. **Identify memory-hungry containers:** - ```bash - docker stats --format "table {{.Name}}\t{{.MemUsage}}\t{{.MemPerc}}" - ``` - -2. **Analyze R2R application logs:** - - Look for patterns of increasing memory usage over time. - -3. **Optimize memory usage:** - - Implement proper garbage collection in custom code. - - Review and optimize caching strategies. - - Consider using streaming for large data processing tasks instead of loading entire datasets into memory. - -4. **Adjust container memory limits:** - - Update the Docker Compose file to set appropriate memory limits for containers: - ```yaml - services: - r2r: - deploy: - resources: - limits: - memory: 4G - ``` - -### 2.3 High Disk I/O - -#### Possible Causes: -- Frequent logging -- Inefficient database operations -- Large file ingestion processes - -#### Solutions: -1. **Monitor disk I/O:** - ```bash - docker stats --format "table {{.Name}}\t{{.BlockIO}}" - ``` - -2. **Optimize logging:** - - Reduce log verbosity for non-critical information. - - Implement log rotation to manage file sizes. - -3. **Database optimizations:** - - Ensure proper indexing in Postgres. - - Optimize query patterns to reduce full table scans. - -4. **File ingestion improvements:** - - Implement chunking for large file ingestion. - - Consider using a dedicated storage service for large files. - -## 3. Monitoring and Prevention - -Implement proactive monitoring to catch resource issues early: - -1. **Set up Docker monitoring:** - - Use tools like Prometheus and Grafana to monitor Docker metrics. - - Set up alerts for when resource usage exceeds certain thresholds. - -2. **Implement application-level metrics:** - - Use libraries like `prometheus_client` in Python to expose custom metrics. - - Monitor key performance indicators specific to your R2R usage. - -3. **Regular performance audits:** - - Periodically review resource usage patterns. - - Conduct load testing to identify potential bottlenecks before they impact production. - -## 4. Advanced Debugging - -For persistent issues: - -1. **Profile the R2R application:** - - Use Python profiling tools like cProfile or memory_profiler to identify resource-intensive code sections. - -2. **Analyze Docker logs:** - ```bash - docker logs r2r-container-name - ``` - -3. **Inspect container details:** - ```bash - docker inspect r2r-container-name - ``` - -4. **Review orchestration logs:** - - Check Hatchet logs for insights into task execution and resource allocation. - -## 5. Scaling Considerations - -If high resource usage persists despite optimizations: - -1. **Vertical scaling:** - - Increase resources (CPU, RAM) for the Docker host. - -2. **Horizontal scaling:** - - Implement load balancing across multiple R2R instances. - - Consider using Docker Swarm or Kubernetes for orchestration. - -3. **Service separation:** - - Move resource-intensive components (e.g., database, LLM inference) to dedicated hosts. - -## Conclusion - -High resource usage in R2R Docker deployments can be challenging but is often resolvable through careful analysis and optimization. Always ensure you have proper monitoring in place, and regularly review your deployment's performance to catch issues early. If problems persist, don't hesitate to reach out to the R2R community or consider consulting with cloud infrastructure experts. diff --git a/docs/documentation/deployment/troubleshooting/incorrect_credentials.mdx b/docs/documentation/deployment/troubleshooting/incorrect_credentials.mdx deleted file mode 100644 index 61c8e6cdf..000000000 --- a/docs/documentation/deployment/troubleshooting/incorrect_credentials.mdx +++ /dev/null @@ -1,129 +0,0 @@ -# R2R Troubleshooting Guide: Incorrect Database Credentials - -Database connectivity issues due to incorrect credentials are a common problem when deploying R2R. This guide will help you identify and resolve these issues. - -## Symptoms - -- Error messages containing phrases like "authentication failed", "access denied", or "connection refused" -- R2R application fails to start or crashes shortly after starting -- Database-related operations fail while other parts of the application seem to work - -## Diagnosis Steps - -1. **Check Error Logs** - - Review R2R application logs for specific error messages - - Look for database-related errors, especially those mentioning authentication or connection issues - -2. **Verify Environment Variables** - - Ensure all database-related environment variables are set correctly - - Common variables include: - ``` - R2R_POSTGRES_USER - R2R_POSTGRES_PASSWORD - R2R_POSTGRES_HOST - R2R_POSTGRES_PORT - R2R_POSTGRES_DBNAME - ``` - -3. **Test Database Connection** - - Use a database client tool to test the connection with the same credentials - - For PostgreSQL, you can use the `psql` command-line tool: - ``` - psql -h -p -U -d - ``` - -4. **Check Database Server Status** - - Ensure the database server is running and accessible from the R2R container - - Verify network connectivity between R2R and the database server - -5. **Inspect Docker Compose File** - - Review the `docker-compose.yml` file to ensure database service configuration is correct - - Check for any discrepancies between the database service and R2R service configurations - -## Resolution Steps - -1. **Correct Environment Variables** - - Update the `.env` file or set environment variables with the correct database credentials - - Ensure these variables are properly passed to the R2R container - -2. **Update Docker Compose File** - - If using Docker Compose, update the `docker-compose.yml` file with the correct database configuration - - Ensure the database service name matches what R2R is expecting (e.g., `postgres`) - -3. **Rebuild and Restart Containers** - - After making changes, rebuild and restart your Docker containers: - ``` - docker-compose down - docker-compose up --build - ``` - -4. **Check Database User Permissions** - - Ensure the database user has the necessary permissions - - For PostgreSQL, you might need to grant permissions: - ```sql - GRANT ALL PRIVILEGES ON DATABASE TO ; - ``` - -5. **Verify Database Existence** - - Ensure the specified database exists - - Create it if necessary: - ```sql - CREATE DATABASE ; - ``` - -6. **Check Network Configuration** - - If using Docker, ensure the database and R2R services are on the same network - - Verify firewall rules allow traffic between R2R and the database - -7. **Use Secrets Management** - - Consider using Docker secrets or a secure vault for managing sensitive credentials - - Update your Docker Compose file to use secrets instead of environment variables for passwords - -## Prevention Tips - -1. Use a `.env` file for local development and CI/CD pipelines for production to manage environment variables -2. Implement a health check in your Docker Compose file to ensure the database is ready before starting R2R -3. Use database connection pooling to manage connections efficiently -4. Regularly audit and rotate database credentials -5. Use least privilege principle when setting up database users for R2R - -## Debugging Commands - -Here are some useful commands for debugging database connection issues: - -1. Check if PostgreSQL is running: - ``` - docker-compose ps postgres - ``` - -2. View PostgreSQL logs: - ``` - docker-compose logs postgres - ``` - -3. Check R2R logs: - ``` - docker-compose logs r2r - ``` - -4. Access PostgreSQL CLI within the container: - ``` - docker-compose exec postgres psql -U -d - ``` - -## Seeking Further Help - -If you've tried these steps and are still experiencing issues: - -1. Check the R2R documentation for any specific database setup requirements -2. Review the R2R GitHub issues for similar problems and solutions -3. Reach out to the R2R community on Discord or GitHub for support -4. Provide detailed information about your setup, including: - - R2R version - - Database type and version - - Relevant parts of your Docker Compose file - - Specific error messages you're encountering - -Remember to never share actual passwords or sensitive information when seeking help. Use placeholders instead. - -By following this guide, you should be able to resolve most database credential issues in your R2R deployment. If problems persist, don't hesitate to seek help from the R2R community or support channels. diff --git a/docs/documentation/deployment/troubleshooting/index.mdx b/docs/documentation/deployment/troubleshooting/index.mdx deleted file mode 100644 index 10b5ab02a..000000000 --- a/docs/documentation/deployment/troubleshooting/index.mdx +++ /dev/null @@ -1,73 +0,0 @@ ---- -title: 'Overview' -description: 'Learn how to debug R2R deployments' ---- - -# Troubleshooting - -Have you encountered issues with deploying your R2R system? Have no fear our troubleshooting guide is here. - -# R2R Troubleshooting Guide - -## Common Installation Issues - -### Docker Installation -- Issue: [Docker containers fail to start](/documentation/deployment/troubleshooting/docker) -- Issue: [Missing environment variables](/documentation/deployment/troubleshooting/environment) -- Issue: [Insufficient system resources](documentation/deployment/troubleshooting/resources) - -### Local System Installation -- Issue: [Dependency conflicts](/documentation/deployment/troubleshooting/dependencies) -- Issue: [Service configuration](/documentation/deployment/troubleshooting/services) problems (Postgres, Hatchet) -- Issue: [Unstructured.io](/documentation/deployment/troubleshooting/unstructured) setup difficulties - -## Deployment Problems - -### Cloud Deployments -- Issue: [Connection timeouts](/documentation/deployment/troubleshooting/timeouts) -- Issue: [Insufficient instance resources](/documentation/deployment/troubleshooting/insufficient_resources) -- Issue: [Security group / firewall configuration](/documentation/deployment/troubleshooting/firewall) - -### Other -- Issue: [Port conflicts](/documentation/deployment/troubleshooting/port_conflicts) -- Issue: [Database connection failures](/documentation/deployment/troubleshooting/database) -- Issue: [Local LLM integration issues](/documentation/deployment/troubleshooting/local_llm) - -## Runtime Errors - -### API-related Issues -- Issue: [API endpoints not responding](/documentation/deployment/troubleshooting/api_connections) -- Issue: [Unexpected API responses](/documentation/deployment/troubleshooting/api_responses) - -### Performance Issues -- Issue: [Slow query responses](/documentation/deployment/troubleshooting/slow_queries) -- Issue: [High resource usage](/documentation/deployment/troubleshooting/high_usage) - -## Configuration Troubleshooting - -### Environment Variables -- Issue: [Missing or incorrect API keys](/documentation/deployment/troubleshooting/missing_keys) -- Issue: [Incorrect database credentials](/documentation/deployment/troubleshooting/incorrect_credentials) - -### Custom Configurations -- Issue: [TOML file syntax errors](/documentation/deployment/troubleshooting/toml_errors) -- Issue: [Incompatible configuration settings](/documentation/deployment/troubleshooting/bad_configuration) - -## Component-specific Issues - -### Postgres + pgvector -- Issue: [Vector storage problems](/documentation/deployment/troubleshooting/vector_store_issues) -- Issue: [Connection string errors](/documentation/deployment/troubleshooting/connection_strings) - -### Hatchet -- Issue: [Workflow orchestration failures](/documentation/deployment/troubleshooting/workflows) -- Issue: [RabbitMQ connectivity issues](/documentation/deployment/troubleshooting/rabbit_mq) - -## Debugging Tips - -- [How to check R2R logs](/documentation/deployment/troubleshooting/r2r_logs)) - -## Getting Help - -- [Report issues on GitHub](https://github.com/SciPhi-AI/R2R/issues) -- [Joining the Discord community for support](https://discord.gg/p6KqD2kjtB) diff --git a/docs/documentation/deployment/troubleshooting/insufficient_resources.mdx b/docs/documentation/deployment/troubleshooting/insufficient_resources.mdx deleted file mode 100644 index a36ce57bc..000000000 --- a/docs/documentation/deployment/troubleshooting/insufficient_resources.mdx +++ /dev/null @@ -1,129 +0,0 @@ -# R2R Troubleshooting Guide: Insufficient Instance Resources - -When deploying R2R, you may encounter issues related to insufficient instance resources. This guide will help you identify, troubleshoot, and resolve these problems. - -## Symptoms of Insufficient Resources - -1. Containers fail to start or crash frequently -2. Slow response times or timeouts -3. Out of memory errors -4. High CPU usage alerts -5. Disk space warnings - -## Diagnosing Resource Issues - -### 1. Check Docker Resource Usage - -```bash -docker stats -``` - -This command shows a live stream of container resource usage statistics. - -### 2. Check Host System Resources - -```bash -top -free -h -df -h -``` - -These commands show CPU, memory, and disk usage respectively. - -### 3. Review Container Logs - -```bash -docker logs -``` - -Look for error messages related to resource constraints. - -## Common Resource-Related Issues and Solutions - -### 1. Insufficient Memory - -**Symptom:** Container exits with out of memory error or host system shows high memory usage. - -**Solution:** -- Increase Docker memory limit: - ```bash - docker run --memory=4g ... - ``` -- For Docker Desktop, increase memory allocation in settings. -- For cloud instances, upgrade to a larger instance type. - -### 2. CPU Constraints - -**Symptom:** High CPU usage, slow response times. - -**Solution:** -- Limit CPU usage for non-critical containers: - ```bash - docker run --cpus=0.5 ... - ``` -- Upgrade to an instance with more CPU cores. - -### 3. Disk Space Issues - -**Symptom:** "No space left on device" errors. - -**Solution:** -- Clean up unused Docker resources: - ```bash - docker system prune - ``` -- Increase disk space allocation for Docker (in Docker Desktop settings or cloud instance). -- Use volume mounts for large data directories. - -### 4. Network Resource Constraints - -**Symptom:** Network-related timeouts or slow connections. - -**Solution:** -- Check and increase network resource limits: - ```bash - docker network inspect bridge - ``` -- In cloud environments, ensure proper network configuration and bandwidth allocation. - -## R2R-Specific Resource Considerations - -### 1. Postgres with pgvector - -Vector operations can be CPU-intensive. Ensure your instance has sufficient CPU resources, or consider using a managed database service. - -### 2. Ollama for Local LLM - -Local LLM inference can be very resource-intensive. Ensure your instance has: -- At least 8GB of RAM (16GB+ recommended) -- Sufficient disk space for model storage -- A capable CPU or GPU for inference - -### 3. Hatchet Engine - -The Hatchet workflow engine may require significant resources depending on your workload. Monitor its resource usage and adjust as necessary. - -## Optimizing Resource Usage - -1. **Use Resource Limits:** Set appropriate CPU and memory limits for each container. -2. **Optimize Configurations:** Fine-tune application configs (e.g., Postgres work_mem). -3. **Scale Horizontally:** Consider splitting services across multiple smaller instances instead of one large instance. -4. **Use Managed Services:** For production, consider using managed services for databases and other resource-intensive components. -5. **Monitor and Alert:** Set up monitoring and alerting for resource usage to catch issues early. - -## When to Upgrade Resources - -Consider upgrading your instance or allocating more resources when: -1. You consistently see high resource utilization (>80% CPU, >90% memory). -2. Response times are consistently slow and not improving with optimization. -3. You're frequently hitting resource limits and it's affecting system stability. - -## Seeking Further Help - -If you've tried these solutions and still face resource issues: -1. Review the R2R documentation for specific resource recommendations. -2. Check the R2R GitHub issues for similar problems and solutions. -3. Reach out to the R2R community on Discord or GitHub for advice. -4. Consider engaging with R2R maintainers or professional services for complex deployments. - -Remember to always test in a non-production environment before making significant changes to resource allocations or instance types in a production setting. diff --git a/docs/documentation/deployment/troubleshooting/local_llm.mdx b/docs/documentation/deployment/troubleshooting/local_llm.mdx deleted file mode 100644 index f3fcabbc4..000000000 --- a/docs/documentation/deployment/troubleshooting/local_llm.mdx +++ /dev/null @@ -1,185 +0,0 @@ -# Troubleshooting Guide: Local LLM Integration Issues with R2R - -When integrating local Language Models (LLMs) with R2R, you may encounter various issues. This guide will help you diagnose and resolve common problems. - -## 1. Ollama Connection Issues - -### Symptom: R2R can't connect to Ollama - -1. **Check Ollama is running:** - ```bash - docker ps | grep ollama - ``` - Ensure the Ollama container is up and running. - -2. **Verify Ollama API accessibility:** - ```bash - curl http://localhost:11434/api/tags - ``` - This should return a list of available models. - -3. **Check R2R configuration:** - Ensure the `OLLAMA_API_BASE` environment variable is set correctly in your R2R configuration: - ```yaml - OLLAMA_API_BASE: http://ollama:11434 - ``` - -4. **Network connectivity:** - Ensure Ollama and R2R containers are on the same Docker network. - -### Solution: -- If Ollama isn't running, start it with `docker-compose up -d ollama` -- If API is inaccessible, check Ollama logs: `docker logs ollama` -- Correct the `OLLAMA_API_BASE` if necessary -- Ensure both services are on the `r2r-network` in your Docker Compose file - -## 2. Model Loading Issues - -### Symptom: Specified model isn't available or fails to load - -1. **List available models:** - ```bash - curl http://localhost:11434/api/tags - ``` - -2. **Attempt to pull the model:** - ```bash - docker exec -it ollama ollama pull - ``` - -3. **Check Ollama logs for pull errors:** - ```bash - docker logs ollama - ``` - -### Solution: -- If the model isn't listed, pull it using the Ollama CLI -- If pull fails, check internet connectivity and Ollama's GitHub for known issues -- Ensure sufficient disk space for model storage - -## 3. Performance Issues - -### Symptom: Local LLM responses are slow or timeouts occur - -1. **Check system resources:** - ```bash - docker stats - ``` - Look for high CPU or memory usage. - -2. **Verify GPU utilization** (if applicable): - ```bash - nvidia-smi - ``` - -3. **Review Ollama configuration:** - Check if Ollama is configured to use GPU acceleration. - -### Solution: -- Increase resources allocated to the Ollama container -- Enable GPU acceleration if available -- Consider using a smaller or more efficient model - -## 4. Inconsistent Responses - -### Symptom: LLM responses are inconsistent or unexpected - -1. **Verify model version:** - ```bash - docker exec -it ollama ollama list - ``` - Ensure you're using the intended model version. - -2. **Check prompt template:** - Review the prompt template in your R2R configuration for any issues. - -3. **Test model directly:** - ```bash - docker exec -it ollama ollama run "Your test prompt here" - ``` - Compare direct results with those from R2R. - -### Solution: -- Update to the latest model version if necessary -- Adjust prompt templates in R2R configuration -- Ensure consistent tokenization and preprocessing in R2R - -## 5. Integration Configuration Issues - -### Symptom: R2R doesn't use the local LLM as expected - -1. **Review R2R configuration:** - Check your `r2r.toml` or environment variables to ensure local LLM is properly configured. - -2. **Verify LLM provider settings:** - Ensure the correct provider (e.g., 'ollama') is set in your configuration. - -3. **Check R2R logs:** - Look for any errors or warnings related to LLM initialization. - -### Solution: -- Correct configuration settings in `r2r.toml` or environment variables -- Ensure the LLM provider is correctly specified -- Restart R2R after configuration changes - -## 6. Memory Management Issues - -### Symptom: Out of memory errors or crashes during LLM operations - -1. **Monitor memory usage:** - ```bash - docker stats ollama - ``` - -2. **Check Ollama logs for OOM errors:** - ```bash - docker logs ollama | grep "Out of memory" - ``` - -3. **Review model specifications:** - Ensure your hardware meets the minimum requirements for the chosen model. - -### Solution: -- Increase memory allocation for the Ollama container -- Use a smaller model if hardware is limited -- Implement request queuing in R2R to manage concurrent LLM calls - -## 7. API Compatibility Issues - -### Symptom: R2R fails to communicate properly with Ollama - -1. **Check Ollama version:** - ```bash - docker exec -it ollama ollama --version - ``` - -2. **Review R2R documentation:** - Ensure you're using a compatible version of Ollama for your R2R version. - -3. **Test basic API calls:** - ```bash - curl -X POST http://localhost:11434/api/generate -d '{"model": "", "prompt": "Hello, world!"}' - ``` - -### Solution: -- Update Ollama to a compatible version -- Adjust R2R code if using custom integrations -- Check for any middleware or proxy issues affecting API calls - -## Getting Further Help - -If you're still experiencing issues after trying these troubleshooting steps: - -1. Gather relevant logs from both R2R and Ollama -2. Note your system specifications and R2R configuration -3. Check the R2R GitHub issues for similar problems -4. Consider posting a detailed question on the R2R Discord community or GitHub discussions - -Remember to provide: -- R2R version (`r2r --version`) -- Ollama version -- Docker and Docker Compose versions -- Host system specifications -- Detailed error messages and logs - -By following this guide, you should be able to resolve most common issues with local LLM integration in R2R. If problems persist, don't hesitate to reach out to the R2R community for further assistance. diff --git a/docs/documentation/deployment/troubleshooting/missing_keys.mdx b/docs/documentation/deployment/troubleshooting/missing_keys.mdx deleted file mode 100644 index 9142c4415..000000000 --- a/docs/documentation/deployment/troubleshooting/missing_keys.mdx +++ /dev/null @@ -1,163 +0,0 @@ -# Troubleshooting Guide: Missing or Incorrect API Keys in R2R - -API keys are crucial for authenticating and accessing various services integrated with R2R. Missing or incorrect API keys can lead to connection failures and service disruptions. This guide will help you identify and resolve API key issues. - -## 1. Identifying API Key Issues - -Common symptoms of API key problems include: - -- Error messages mentioning "unauthorized," "authentication failed," or "invalid API key" -- Specific services or integrations not working while others function correctly -- Unexpected 401 or 403 HTTP status codes in logs - -## 2. Checking API Key Configuration - -### 2.1 Environment Variables - -R2R uses environment variables to store API keys. Check if the required environment variables are set: - -```bash -env | grep API_KEY -``` - -Look for variables like: -- `OPENAI_API_KEY` -- `ANTHROPIC_API_KEY` -- `AZURE_API_KEY` -- `UNSTRUCTURED_API_KEY` -- `HATCHET_CLIENT_TOKEN` - -### 2.2 Configuration Files - -If you're using configuration files (e.g., `r2r.toml`), verify that API keys are correctly set: - -```bash -grep -i "api_key" /path/to/your/r2r.toml -``` - -## 3. Common API Key Issues and Solutions - -### 3.1 OpenAI API Key - -**Issue:** OpenAI services not working or returning authentication errors. - -**Solution:** -1. Verify the `OPENAI_API_KEY` is set: - ```bash - echo $OPENAI_API_KEY - ``` -2. Ensure the key starts with "sk-". -3. Check the key's validity in the OpenAI dashboard. -4. Regenerate the key if necessary and update the environment variable. - -### 3.2 Anthropic API Key - -**Issue:** Claude or other Anthropic models not functioning. - -**Solution:** -1. Confirm the `ANTHROPIC_API_KEY` is set: - ```bash - echo $ANTHROPIC_API_KEY - ``` -2. Verify the key format (typically starts with "sk-ant-"). -3. Test the key using Anthropic's API documentation. - -### 3.3 Azure API Key - -**Issue:** Azure-based services failing to authenticate. - -**Solution:** -1. Check the `AZURE_API_KEY` is set: - ```bash - echo $AZURE_API_KEY - ``` -2. Verify additional Azure-related variables: - - `AZURE_API_BASE` - - `AZURE_API_VERSION` -3. Ensure the key and endpoint match your Azure resource configuration. - -### 3.4 Unstructured API Key - -**Issue:** File ingestion or parsing failures. - -**Solution:** -1. Verify the `UNSTRUCTURED_API_KEY` is set: - ```bash - echo $UNSTRUCTURED_API_KEY - ``` -2. Check if the Unstructured API URL is correctly configured: - ```bash - echo $UNSTRUCTURED_API_URL - ``` -3. Test the key using Unstructured's API documentation. - -### 3.5 Hatchet Client Token - -**Issue:** Workflow orchestration failures or Hatchet connectivity issues. - -**Solution:** -1. Confirm the `HATCHET_CLIENT_TOKEN` is set: - ```bash - echo $HATCHET_CLIENT_TOKEN - ``` -2. Verify the token was correctly generated during the R2R setup process. -3. Check Hatchet logs for any token-related errors. - -## 4. Updating API Keys - -If you need to update an API key: - -1. Stop the R2R service: - ```bash - docker-compose down - ``` - -2. Update the key in your environment or configuration file: - ```bash - export NEW_API_KEY="your-new-key-here" - ``` - Or update the `r2r.toml` file if you're using configuration files. - -3. Restart the R2R service: - ```bash - docker-compose up -d - ``` - -## 5. Security Best Practices - -- Never commit API keys to version control. -- Use environment variables or secure secret management solutions. -- Regularly rotate API keys, especially if you suspect they've been compromised. -- Use the principle of least privilege when creating API keys. - -## 6. Debugging API Key Issues - -If you're still having trouble: - -1. Check R2R logs for detailed error messages: - ```bash - docker-compose logs r2r - ``` - -2. Verify network connectivity to the API endpoints. - -3. Ensure your account has the necessary permissions for the API keys you're using. - -4. Try using the API key in a simple curl command to isolate R2R-specific issues: - ```bash - curl -H "Authorization: Bearer $YOUR_API_KEY" https://api.example.com/v1/test - ``` - -## 7. Getting Help - -If you've tried these steps and are still experiencing issues: - -1. Check the R2R documentation for any recent changes or known issues with API integrations. -2. Search the R2R GitHub issues for similar problems and solutions. -3. Reach out to the R2R community on Discord or other support channels, providing: - - R2R version - - Relevant logs (with sensitive information redacted) - - Steps to reproduce the issue - - Any error messages you're seeing - -Remember, never share your actual API keys when seeking help. Use placeholders or redacted versions in any logs or code snippets you share publicly. diff --git a/docs/documentation/deployment/troubleshooting/port_conflicts.mdx b/docs/documentation/deployment/troubleshooting/port_conflicts.mdx deleted file mode 100644 index 834965483..000000000 --- a/docs/documentation/deployment/troubleshooting/port_conflicts.mdx +++ /dev/null @@ -1,170 +0,0 @@ -# Troubleshooting Guide: Docker Port Conflicts - -Port conflicts are a common issue when deploying Docker containers, especially in complex setups like R2R. This guide will help you identify, diagnose, and resolve port conflicts in your Docker environment. - -## Understanding Port Conflicts - -A port conflict occurs when two processes attempt to use the same network port. In Docker, this typically happens when: - -1. A container tries to bind to a port already in use by the host system. -2. Multiple containers attempt to use the same port. -3. A container's port mapping conflicts with another container or host process. - -## Identifying Port Conflicts - -Signs of a port conflict include: - -- Error messages during container startup mentioning "port is already allocated" or "address already in use". -- Services failing to start or be accessible. -- Unexpected behavior in applications that rely on specific ports. - -## Steps to Diagnose and Resolve Port Conflicts - -### 1. Check for Used Ports - -First, identify which ports are already in use on your system: - -```bash -sudo lsof -i -P -n | grep LISTEN -``` - -or - -```bash -netstat -tuln -``` - -### 2. Review Docker Compose File - -Examine your `docker-compose.yml` file for port mappings: - -```yaml -services: - myservice: - ports: - - "8080:80" # Host port 8080 maps to container port 80 -``` - -### 3. Modify Port Mappings - -If you identify a conflict, you can: - -a. Change the host port in your Docker Compose file: - -```yaml -services: - myservice: - ports: - - "8081:80" # Changed from 8080 to 8081 -``` - -b. Use automatic port assignment: - -```yaml -services: - myservice: - ports: - - "80" # Docker will assign a random available host port -``` - -### 4. Stop Conflicting Services - -If a host service is using the required port: - -```bash -sudo service conflicting_service stop -``` - -### 5. Release Docker Resources - -Sometimes, stopping and removing all Docker containers and networks can help: - -```bash -docker-compose down -docker system prune -``` - -### 6. Check for Docker Network Conflicts - -Ensure your Docker networks don't have overlapping subnets: - -```bash -docker network ls -docker network inspect network_name -``` - -### 7. Use Network Host Mode (Caution) - -As a last resort, you can use host network mode, but this bypasses Docker's network isolation: - -```yaml -services: - myservice: - network_mode: "host" -``` - -### 8. Debugging with Docker Logs - -Check container logs for more detailed error messages: - -```bash -docker-compose logs service_name -``` - -## Specific R2R Port Conflict Scenarios - -### R2R API Server Conflict - -If the R2R API server (default port 7272) is conflicting: - -1. Check if any other service is using port 7272: - ```bash - sudo lsof -i :7272 - ``` - -2. Modify the R2R service in your docker-compose.yml: - ```yaml - services: - r2r: - ports: - - "7273:7272" # Changed host port to 7273 - ``` - -3. Update your environment variables: - ``` - PORT=7273 - ``` - -### Hatchet Engine Conflict - -If the Hatchet engine (default port 7077) is conflicting: - -1. Check for conflicts: - ```bash - sudo lsof -i :7077 - ``` - -2. Modify the Hatchet engine service: - ```yaml - services: - hatchet-engine: - ports: - - "7078:7077" - ``` - -3. Update the `SERVER_GRPC_BROADCAST_ADDRESS` environment variable for the Hatchet engine service. - -## Preventing Future Conflicts - -1. Use environment variables for port numbers in your Docker Compose file. -2. Document the ports used by each service in your project. -3. Consider using tools like Traefik or Nginx as reverse proxies to manage port allocation dynamically. - -By following this guide, you should be able to identify and resolve most port conflicts in your Docker and R2R setup. Remember, after making changes to your Docker Compose file or configuration, you'll need to rebuild and restart your services: - -```bash -docker-compose down -docker-compose up --build -``` - -If problems persist, check the R2R documentation or seek help from the community support channels. diff --git a/docs/documentation/deployment/troubleshooting/r2r_health.mdx b/docs/documentation/deployment/troubleshooting/r2r_health.mdx deleted file mode 100644 index 8b2a0026d..000000000 --- a/docs/documentation/deployment/troubleshooting/r2r_health.mdx +++ /dev/null @@ -1,155 +0,0 @@ -# Using the R2R Health Check Endpoint - -The health check endpoint in R2R provides a quick and easy way to verify the status of your R2R deployment. This guide will walk you through how to use this endpoint, interpret its results, and integrate it into your monitoring systems. - -## 1. Understanding the Health Check Endpoint - -The health check endpoint is a specific URL that, when accessed, returns information about the current state of the R2R system. It typically checks various components and dependencies to ensure everything is functioning correctly. - -## 2. Accessing the Health Check Endpoint - -The health check endpoint is usually available at `/v2/health`. Here are different ways to access it: - -### 2.1 Using cURL - -You can use cURL to make a GET request to the health check endpoint: - -```bash -curl http://localhost:7272/v2/health -``` - -### 2.2 Using a Web Browser - -If your R2R instance is accessible via a web browser, you can simply navigate to: - -``` -http://localhost:7272/v2/health -``` - -Replace `localhost:7272` with the appropriate host and port if your setup is different. - -### 2.3 Using the R2R CLI - -R2R provides a CLI command to check the health of the system: - -```bash -r2r health -``` - -### 2.4 Using the Python Client - -If you're using the R2R Python client: - -```python -from r2r import R2R - -client = R2R() -health_status = client.health() -print(health_status) -``` - -## 3. Interpreting Health Check Results - -The health check endpoint typically returns a JSON response. Here's an example of what it might look like: - -```json -{ - "status": "healthy", - "version": "3.1.22", - "components": { - "database": { - "status": "healthy", - "message": "Connected to database successfully" - }, - "vector_store": { - "status": "healthy", - "message": "Vector store is operational" - }, - "llm_service": { - "status": "healthy", - "message": "LLM service is responding" - } - }, - "timestamp": "2024-09-11T15:30:45Z" -} -``` - -Key elements to look for: -- Overall `status`: Should be "healthy" if everything is okay. -- `version`: Indicates the current version of R2R. -- `components`: Shows the status of individual components. -- `timestamp`: When the health check was performed. - -## 4. Common Issues and Troubleshooting - -If the health check returns a non-healthy status, here are some common issues and how to address them: - -### 4.1 Database Connection Issues - -If the database component shows as unhealthy: -- Check database credentials in your R2R configuration. -- Ensure the database server is running and accessible. -- Verify network connectivity between R2R and the database. - -### 4.2 Vector Store Problems - -For vector store issues: -- Check if the vector store service (e.g., Postgres with pgvector) is running. -- Verify the vector store configuration in R2R settings. - -### 4.3 LLM Service Not Responding - -If the LLM service is unhealthy: -- Check your API key for the LLM service (e.g., OpenAI API key). -- Ensure you have internet connectivity if using a cloud-based LLM. -- Verify the LLM service endpoint in your configuration. - -## 5. Integrating Health Checks into Monitoring Systems - -To ensure continuous monitoring of your R2R deployment: - -### 5.1 Prometheus Integration - -If you're using Prometheus for monitoring: - -1. Set up a Prometheus exporter that periodically calls the health check endpoint. -2. Configure Prometheus to scrape metrics from this exporter. -3. Set up alerts for when the health status is not "healthy". - -### 5.2 Kubernetes Liveness Probe - -If deploying R2R in Kubernetes, use the health check as a liveness probe: - -```yaml -livenessProbe: - httpGet: - path: /v2/health - port: 7272 - initialDelaySeconds: 30 - periodSeconds: 10 -``` - -### 5.3 AWS CloudWatch - -For AWS deployments: - -1. Create a CloudWatch synthetic canary that periodically calls the health check endpoint. -2. Set up CloudWatch alarms based on the canary's results. - -## 6. Best Practices - -1. Regular Checks: Perform health checks at regular intervals (e.g., every 5 minutes). -2. Alerting: Set up alerts for when the health check fails consistently. -3. Logging: Log health check results for historical analysis. -4. Trend Analysis: Monitor trends in response times of the health check endpoint. -5. Comprehensive Checks: Ensure your health check covers all critical components of your R2R deployment. - -## 7. Advanced Health Check Customization - -R2R allows for customization of the health check endpoint. You can add custom checks or modify existing ones by editing the health check configuration. Refer to the R2R documentation for detailed instructions on how to customize health checks for your specific deployment needs. - -## Conclusion - -The health check endpoint is a crucial tool for maintaining the reliability and performance of your R2R deployment. By regularly utilizing this endpoint and integrating it into your monitoring systems, you can ensure quick detection and resolution of any issues that may arise in your R2R system. - -For more detailed information on R2R's features and advanced configurations, refer to the [official R2R documentation](https://r2r-docs.sciphi.ai/) or join the [R2R Discord community](https://discord.gg/p6KqD2kjtB) for support and discussions. diff --git a/docs/documentation/deployment/troubleshooting/r2r_logs.mdx b/docs/documentation/deployment/troubleshooting/r2r_logs.mdx deleted file mode 100644 index 9ccdebe40..000000000 --- a/docs/documentation/deployment/troubleshooting/r2r_logs.mdx +++ /dev/null @@ -1,281 +0,0 @@ -# How to Check R2R Logs and Use Analytics & Observability Features - -This guide covers various methods to access and analyze R2R logs, as well as leverage R2R's powerful analytics and observability features. These capabilities allow you to monitor system performance, track usage patterns, and gain valuable insights into your RAG application's behavior. - -## 1. Checking R2R Logs - -### 1.1 Docker Deployment - -If you're running R2R using Docker: - -1. List running containers: - ```bash - docker ps - ``` - -2. View real-time logs: - ```bash - docker logs -f - ``` - -3. Using Docker Compose: - ```bash - docker-compose logs -f r2r - ``` - -### 1.2 Local Deployment - -For local deployments without Docker: - -1. Check the R2R configuration for log file locations. -2. Use standard Unix tools to view logs: - ```bash - tail -f /path/to/r2r.log - ``` - -### 1.3 Cloud Deployments - -- For Azure: Use "Log stream" or "Diagnose and solve problems" in the Azure portal. -- For AWS: Use CloudWatch or check EC2/ECS logs directly. - -## 2. Using R2R's Logging and Analytics Features - - -The features described in this section are typically restricted to superusers. Ensure you have the necessary permissions before attempting to access these features. - - -### 2.1 Fetching Logs - -You can fetch logs using the client-server architecture: - - - -```bash -r2r logs -``` - - - -```python -client.logs() -``` - - - -```javascript -client.logs() -``` - - - -```bash -curl -X POST http://localhost:7272/v2/logs \ - -H "Content-Type: application/json" \ - -d '{ - "log_type_filter": null, - "max_runs_requested": 100 - }' -``` - - - -Expected Output: -```python -[ - { - 'run_id': UUID('27f124ad-6f70-4641-89ab-f346dc9d1c2f'), - 'run_type': 'rag', - 'entries': [ - {'key': 'search_query', 'value': 'Who is Aristotle?'}, - {'key': 'search_latency', 'value': '0.39'}, - {'key': 'search_results', 'value': '["{\\"id\\":\\"7ed3a01c-88dc-5a58-a68b-6e5d9f292df2\\",...}"]'}, - {'key': 'rag_generation_latency', 'value': '3.79'}, - {'key': 'llm_response', 'value': 'Aristotle (Greek: Ἀριστοτέλης Aristotélēs; 384–322 BC) was...'} - ] - }, - # More log entries... -] -``` - -### 2.2 Using Analytics - -R2R offers an analytics feature for aggregating and analyzing log data: - - - -```bash -r2r analytics --filters '{"search_latencies": "search_latency"}' --analysis-types '{"search_latencies": ["basic_statistics", "search_latency"]}' -``` - - - -```python -client.analytics( - {"search_latencies": "search_latency"}, - {"search_latencies": ["basic_statistics", "search_latency"]} -) -``` - - - -```javascript -const filterCriteria = { - filters: { - search_latencies: "search_latency", - }, - }; - -const analysisTypes = { - search_latencies: ["basic_statistics", "search_latency"], -}; - -client.analytics(filterCriteria, analysisTypes); -``` - - - -```bash -curl -X POST http://localhost:7272/v2/analytics \ - -H "Content-Type: application/json" \ - -d '{ - "filter_criteria": { - "filters": { - "search_latencies": "search_latency" - } - }, - "analysis_types": - { - "analysis_types": { - "search_latencies": ["basic_statistics", "search_latency"] - } - } - }' -``` - - - -Expected Output: -```python -{ - 'results': { - 'filtered_logs': { - 'search_latencies': [ - { - 'timestamp': '2024-06-20 21:29:06', - 'log_id': UUID('0f28063c-8b87-4934-90dc-4cd84dda5f5c'), - 'key': 'search_latency', - 'value': '0.66', - 'rn': 3 - }, - ... - ] - }, - 'search_latencies': { - 'Mean': 0.734, - 'Median': 0.523, - 'Mode': 0.495, - 'Standard Deviation': 0.213, - 'Variance': 0.0453 - } - } -} -``` - -## 3. Advanced Analytics and Observability - -### 3.1 Custom Analytics - -You can specify different filters and analysis types to focus on specific aspects of your application's performance: - -```python -from r2r import FilterCriteria, AnalysisTypes - -# Analyze RAG latencies -rag_filter = FilterCriteria(filters={"rag_latencies": "rag_generation_latency", "rag_eval": "rag_eval_metric"}) -rag_analysis = AnalysisTypes(analysis_types={"rag_latencies": ["basic_statistics", "rag_generation_latency"]}) -rag_analytics = app.analytics(rag_filter, rag_analysis) - -# Track usage patterns by user -user_filter = FilterCriteria(filters={"user_patterns": "user_id"}) -user_analysis = AnalysisTypes(analysis_types={"user_patterns": ["bar_chart", "user_id"]}) -user_analytics = app.analytics(user_filter, user_analysis) - -# Monitor error rates -error_filter = FilterCriteria(filters={"error_rates": "error"}) -error_analysis = AnalysisTypes(analysis_types={"error_rates": ["basic_statistics", "error"]}) -error_analytics = app.analytics(error_filter, error_analysis) -``` - -### 3.2 Preloading Data for Analysis - -To get meaningful analytics, you can preload your database with random searches: - -```python -import random -from r2r import R2R, GenerationConfig - -app = R2R() - -queries = [ - "What is artificial intelligence?", - "Explain machine learning.", - "How does natural language processing work?", - "What are neural networks?", - "Describe deep learning.", - # Add more queries as needed -] - -for _ in range(1000): - query = random.choice(queries) - app.rag(query, GenerationConfig(model="openai/gpt-4o-mini")) - -print("Preloading complete. You can now run analytics on this data.") -``` - -### 3.3 User-Level Analytics - -To get analytics for a specific user: - -```python -user_id = "your_user_id_here" - -user_filter = FilterCriteria(filters={"user_analytics": "user_id"}) -user_analysis = AnalysisTypes(analysis_types={ - "user_analytics": ["basic_statistics", "user_id"], - "user_search_latencies": ["basic_statistics", "search_latency"] -}) - -user_analytics = app.analytics(user_filter, user_analysis) -print(f"Analytics for user {user_id}:") -print(user_analytics) -``` - -## 4. Log Analysis Tips - -1. Look for ERROR or WARNING level logs first. -2. Check timestamps to correlate logs with observed issues. -3. Use tools like `grep`, `awk`, or `sed` to filter logs. -4. For large log files, use `less` with search functionality. - -## 5. Log Aggregation Tools - -Consider using log aggregation tools for more advanced setups: - -1. ELK Stack (Elasticsearch, Logstash, Kibana) -2. Prometheus and Grafana -3. Datadog -4. Splunk - -## Summary - -R2R's logging, analytics, and observability features provide powerful tools for understanding and optimizing your RAG application. By leveraging these capabilities, you can: - -- Monitor system performance in real-time -- Analyze trends in search and RAG operations -- Identify potential bottlenecks or areas for improvement -- Track user behavior and usage patterns -- Make data-driven decisions to enhance your application's performance and user experience - -Remember to rotate logs regularly and set up log retention policies to manage disk space, especially in production environments. - -For more advanced usage and customization options, consider joining the [R2R Discord community](https://discord.gg/p6KqD2kjtB) or referring to the detailed [R2R documentation](https://r2r-docs.sciphi.ai/). diff --git a/docs/documentation/deployment/troubleshooting/rabbit_mq.mdx b/docs/documentation/deployment/troubleshooting/rabbit_mq.mdx deleted file mode 100644 index 4e0e533cf..000000000 --- a/docs/documentation/deployment/troubleshooting/rabbit_mq.mdx +++ /dev/null @@ -1,165 +0,0 @@ -# Troubleshooting Guide: RabbitMQ Connectivity Issues in R2R - -RabbitMQ is a critical component in the R2R architecture, used for message queuing and task orchestration. Connectivity issues can disrupt the entire system. This guide will help you diagnose and resolve common RabbitMQ connectivity problems. - -## 1. Verify RabbitMQ Service Status - -First, ensure that the RabbitMQ service is running: - -```bash -docker ps | grep rabbitmq -``` - -If you don't see the RabbitMQ container running, start it: - -```bash -docker-compose up -d hatchet-rabbitmq -``` - -## 2. Check RabbitMQ Logs - -View the RabbitMQ container logs: - -```bash -docker logs r2r-hatchet-rabbitmq-1 -``` - -Look for error messages related to connectivity, authentication, or resource issues. - -## 3. Verify RabbitMQ Connection Settings - -Ensure that the connection settings in your R2R configuration match the RabbitMQ service: - -1. Check the `SERVER_TASKQUEUE_RABBITMQ_URL` environment variable in the `hatchet-setup-config` service. -2. Verify that the URL format is correct: `amqp://user:password@hatchet-rabbitmq:5672/` - -## 4. Common Issues and Solutions - -### 4.1 Authentication Failures - -**Symptom:** Logs show authentication errors. - -**Solution:** -1. Verify the RabbitMQ credentials: - ```bash - docker exec r2r-hatchet-rabbitmq-1 rabbitmqctl list_users - ``` -2. If necessary, reset the password: - ```bash - docker exec r2r-hatchet-rabbitmq-1 rabbitmqctl change_password user newpassword - ``` -3. Update the `SERVER_TASKQUEUE_RABBITMQ_URL` in your R2R configuration with the new credentials. - -### 4.2 Network Connectivity - -**Symptom:** Services can't connect to RabbitMQ. - -**Solution:** -1. Ensure all services are on the same Docker network: - ```bash - docker network inspect r2r-network - ``` -2. Verify that the RabbitMQ service is accessible within the network: - ```bash - docker run --rm --network r2r-network alpine ping hatchet-rabbitmq - ``` - -### 4.3 Port Conflicts - -**Symptom:** RabbitMQ fails to start due to port conflicts. - -**Solution:** -1. Check if the ports are already in use: - ```bash - sudo lsof -i :5672 - sudo lsof -i :15672 - ``` -2. Modify the port mappings in your Docker Compose file if necessary. - -### 4.4 Resource Constraints - -**Symptom:** RabbitMQ becomes unresponsive or crashes frequently. - -**Solution:** -1. Check RabbitMQ resource usage: - ```bash - docker stats r2r-hatchet-rabbitmq-1 - ``` -2. Increase resources allocated to the RabbitMQ container in your Docker Compose file: - ```yaml - hatchet-rabbitmq: - # ... other configurations ... - deploy: - resources: - limits: - cpus: '1' - memory: 1G - ``` - -### 4.5 File Descriptor Limits - -**Symptom:** RabbitMQ logs show warnings about file descriptor limits. - -**Solution:** -1. Increase the file descriptor limit for the RabbitMQ container: - ```yaml - hatchet-rabbitmq: - # ... other configurations ... - ulimits: - nofile: - soft: 65536 - hard: 65536 - ``` - -## 5. Advanced Troubleshooting - -### 5.1 RabbitMQ Management Interface - -Access the RabbitMQ Management Interface for detailed diagnostics: - -1. Enable the management plugin if not already enabled: - ```bash - docker exec r2r-hatchet-rabbitmq-1 rabbitmq-plugins enable rabbitmq_management - ``` -2. Access the interface at `http://localhost:15672` (use the credentials defined in your Docker Compose file). - -### 5.2 Network Packet Capture - -If you suspect network issues, capture and analyze network traffic: - -```bash -docker run --net=container:r2r-hatchet-rabbitmq-1 --rm -v $(pwd):/cap nicolaka/netshoot tcpdump -i eth0 -w /cap/rabbitmq_traffic.pcap -``` - -Analyze the captured file with Wireshark for detailed network diagnostics. - -### 5.3 RabbitMQ Cluster Status - -If you're running a RabbitMQ cluster, check its status: - -```bash -docker exec r2r-hatchet-rabbitmq-1 rabbitmqctl cluster_status -``` - -## 6. Preventive Measures - -1. Implement health checks in your Docker Compose file: - ```yaml - hatchet-rabbitmq: - # ... other configurations ... - healthcheck: - test: ["CMD", "rabbitmqctl", "status"] - interval: 30s - timeout: 10s - retries: 5 - ``` - -2. Set up monitoring and alerting for RabbitMQ using tools like Prometheus and Grafana. - -3. Regularly backup RabbitMQ definitions and data: - ```bash - docker exec r2r-hatchet-rabbitmq-1 rabbitmqctl export_definitions /tmp/rabbitmq_defs.json - docker cp r2r-hatchet-rabbitmq-1:/tmp/rabbitmq_defs.json ./rabbitmq_defs.json - ``` - -By following this guide, you should be able to diagnose and resolve most RabbitMQ connectivity issues in your R2R deployment. If problems persist, consider seeking help from the RabbitMQ community or consulting the official RabbitMQ documentation for more advanced troubleshooting techniques. diff --git a/docs/documentation/deployment/troubleshooting/resources.mdx b/docs/documentation/deployment/troubleshooting/resources.mdx deleted file mode 100644 index 4d5d0a443..000000000 --- a/docs/documentation/deployment/troubleshooting/resources.mdx +++ /dev/null @@ -1,159 +0,0 @@ -# R2R Troubleshooting Guide: Insufficient System Resources - -When running R2R in Docker, you may encounter issues related to insufficient system resources. This guide will help you identify, diagnose, and resolve these problems. - -## Symptoms of Insufficient Resources - -1. Docker containers fail to start or crash unexpectedly -2. Slow performance or unresponsiveness of R2R services -3. Error messages related to memory, CPU, or disk space -4. Unexpected termination of processes within containers - -## Diagnosing Resource Issues - -### 1. Check Docker Resource Usage - -Use the following command to view resource usage for all containers: - -```bash -docker stats -``` - -Look for containers using high percentages of CPU or memory. - -### 2. Check Host System Resources - -On Linux: -```bash -top -free -h -df -h -``` - -On macOS: -```bash -top -vm_stat -df -h -``` - -On Windows: -``` -Task Manager > Performance tab -``` - -### 3. Review Docker Logs - -Check logs for specific containers: - -```bash -docker logs -``` - -Look for out-of-memory errors or other resource-related messages. - -## Common Issues and Solutions - -### 1. Insufficient Memory - -**Symptom:** Containers crash with out-of-memory errors. - -**Solutions:** -a. Increase Docker's memory limit: - - On Docker Desktop, go to Settings > Resources > Advanced and increase memory allocation. - -b. Optimize memory usage in R2R configuration: - - Modify Postgres memory settings in `postgresql.conf` - -c. Add or increase swap space on your host system. - -### 2. CPU Constraints - -**Symptom:** Slow performance or high CPU usage warnings. - -**Solutions:** -a. Increase Docker's CPU limit: - - On Docker Desktop, go to Settings > Resources > Advanced and increase CPU allocation. - -b. Optimize R2R and dependent services: - - Adjust thread pool sizes in configurations - - Consider using CPU-specific Docker options like `--cpus` or `--cpu-shares` - -### 3. Disk Space Issues - -**Symptom:** "No space left on device" errors or containers failing to write data. - -**Solutions:** -a. Clean up Docker resources: -```bash -docker system prune -``` - -b. Increase disk space allocation for Docker: - - On Docker Desktop, go to Settings > Resources > Advanced and increase disk image size. - -c. Monitor and manage log file sizes: - - Implement log rotation for services like Postgres - - Use Docker's logging options to limit log file sizes: - ```yaml - logging: - options: - max-size: "10m" - max-file: "3" - ``` - -### 4. Network Resource Constraints - -**Symptom:** Connection timeouts or network-related errors. - -**Solutions:** -a. Check and increase ulimit for open files: -```bash -ulimit -n 65535 -``` - -b. Optimize Docker network settings: - - Use `host` network mode for better performance (if security allows) - - Adjust MTU settings if necessary - -## Preventive Measures - -1. **Regular Monitoring:** Set up monitoring tools like Prometheus and Grafana to track resource usage over time. - -2. **Resource Limits:** Set appropriate resource limits in your Docker Compose file: - - ```yaml - services: - r2r: - deploy: - resources: - limits: - cpus: '0.50' - memory: 512M - reservations: - cpus: '0.25' - memory: 256M - ``` - -3. **Performance Testing:** Conduct regular performance tests to identify resource bottlenecks before they become critical. - -4. **Scaling Strategy:** Develop a strategy for horizontal scaling of R2R services as your usage grows. - -## When to Upgrade Hardware - -Consider upgrading your hardware or moving to a more powerful cloud instance if: - -1. You consistently hit resource limits despite optimization efforts. -2. Performance degrades as your data or user base grows. -3. You need to scale beyond what your current hardware can support. - -## Seeking Further Help - -If you've tried these solutions and still face resource issues: - -1. Gather detailed logs and resource usage data. -2. Check the R2R documentation for specific resource recommendations. -3. Consult the R2R community forums or support channels. -4. Consider reaching out to a DevOps or Docker specialist for advanced troubleshooting. - -Remember, optimal resource allocation often requires iterative testing and adjustment based on your specific use case and workload. diff --git a/docs/documentation/deployment/troubleshooting/services.mdx b/docs/documentation/deployment/troubleshooting/services.mdx deleted file mode 100644 index 829acb3fb..000000000 --- a/docs/documentation/deployment/troubleshooting/services.mdx +++ /dev/null @@ -1,145 +0,0 @@ -# R2R Service Configuration Troubleshooting Guide: Postgres, Hatchet - -This guide addresses common configuration problems for Postgres, and Hatchet services in R2R deployments. - -## Postgres Configuration Issues - -### 1. Connection Failures - -**Symptom:** R2R cannot connect to Postgres database. - -**Possible Causes and Solutions:** - -a) Incorrect connection string: - - Verify the `DATABASE_URL` environment variable. - - Ensure it follows the format: `postgres://user:password@host:port/dbname` - -b) Network issues: - - Check if Postgres is running and accessible from the R2R container. - - Verify network settings in Docker Compose file. - -c) Authentication problems: - - Confirm that the username and password in the connection string are correct. - - Check Postgres logs for failed authentication attempts. - -### 2. pgvector Extension Missing - -**Symptom:** Vector operations fail or R2R reports missing pgvector functionality. - -**Solution:** -- Ensure you're using the `pgvector/pgvector` image instead of the standard Postgres image. -- Verify that the pgvector extension is created in your database: - ```sql - CREATE EXTENSION IF NOT EXISTS vector; - ``` - -### 3. Insufficient Resources - -**Symptom:** Postgres performance issues or crashes under load. - -**Solution:** -- Adjust resource limits in Docker Compose: - ```yaml - postgres: - deploy: - resources: - limits: - cpus: '2' - memory: 4G - ``` -- Tune Postgres configuration parameters like `shared_buffers`, `effective_cache_size`, etc. - - -## Hatchet Configuration Issues - -### 1. RabbitMQ Connection Problems - -**Symptom:** Hatchet cannot connect to RabbitMQ. - -**Solution:** -- Verify RabbitMQ connection string: - ```yaml - environment: - - SERVER_TASKQUEUE_RABBITMQ_URL=amqp://user:password@hatchet-rabbitmq:5672/ - ``` -- Ensure RabbitMQ service is healthy and accessible. - -### 2. Hatchet API Key Issues - -**Symptom:** R2R cannot authenticate with Hatchet. - -**Solution:** -- Check the Hatchet API key generation process: - ```yaml - setup-token: - command: > - sh -c " - TOKEN=$(/hatchet/hatchet-admin token create --config /hatchet/config --tenant-id your-tenant-id) - echo $TOKEN > /hatchet_api_key/api_key.txt - " - ``` -- Verify that R2R is correctly reading the API key: - ```yaml - r2r: - environment: - - HATCHET_CLIENT_TOKEN=${HATCHET_CLIENT_TOKEN} - command: > - sh -c ' - export HATCHET_CLIENT_TOKEN=$(cat /hatchet_api_key/api_key.txt) - exec your_r2r_command - ' - ``` - -### 3. Hatchet Engine Health Check Failures - -**Symptom:** Hatchet Engine service fails health checks. - -**Solution:** -- Verify Hatchet Engine configuration: - ```yaml - hatchet-engine: - environment: - - SERVER_GRPC_BROADCAST_ADDRESS=host.docker.internal:7077 - - SERVER_GRPC_BIND_ADDRESS=0.0.0.0 - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:7077/health"] - interval: 10s - timeout: 5s - retries: 5 - ``` -- Check Hatchet Engine logs for startup errors. -- Ensure all required environment variables are set correctly. - -## General Troubleshooting Tips - -1. **Check Logs:** Always start by examining the logs of the problematic service: - ``` - docker-compose logs postgres - docker-compose logs hatchet-engine - ``` - -2. **Verify Network Connectivity:** Ensure services can communicate: - ``` - docker-compose exec r2r ping postgres - docker-compose exec r2r ping hatchet-engine - ``` - -3. **Check Resource Usage:** Monitor CPU and memory usage: - ``` - docker stats - ``` - -4. **Recreate Containers:** Sometimes, recreating containers can resolve issues: - ``` - docker-compose up -d --force-recreate - ``` - -5. **Verify Volumes:** Ensure data persistence by checking volume mounts: - ``` - docker volume ls - docker volume inspect - ``` - -6. **Environment Variables:** Double-check all environment variables in your `.env` file and `docker-compose.yml`. - -By following this guide, you should be able to diagnose and resolve most configuration issues related to Postgres, and Hatchet in your R2R deployment. If problems persist, consider seeking help from the R2R community or support channels. diff --git a/docs/documentation/deployment/troubleshooting/slow_queries.mdx b/docs/documentation/deployment/troubleshooting/slow_queries.mdx deleted file mode 100644 index 0a18860a9..000000000 --- a/docs/documentation/deployment/troubleshooting/slow_queries.mdx +++ /dev/null @@ -1,143 +0,0 @@ -# R2R Troubleshooting Guide: Slow Query Responses - -If you're experiencing slow query responses in your R2R deployment, this guide will help you identify and resolve common causes of performance issues. - -## 1. Identify the Bottleneck - -Before diving into specific solutions, it's crucial to identify where the slowdown is occurring: - -- Is it specific to certain types of queries? -- Is it affecting all queries or only queries to a particular data source? -- Is the slowdown consistent or intermittent? - -## 2. Check Database Performance - -### 2.1 Postgres - -1. **Monitor query execution time:** - ```sql - SELECT query, calls, total_time, mean_time - FROM pg_stat_statements - ORDER BY mean_time DESC - LIMIT 10; - ``` - -2. **Check for missing indexes:** - ```sql - SELECT relname, seq_scan, idx_scan - FROM pg_stat_user_tables - WHERE seq_scan > idx_scan - ORDER BY seq_scan DESC; - ``` - -3. **Analyze and vacuum the database:** - ```sql - ANALYZE; - VACUUM ANALYZE; - ``` - -## 3. Optimize Vector Search - -1. **Check vector index:** - Ensure your vector index is properly created and optimized. - -2. **Adjust search parameters:** - Experiment with different `ef_search` values to balance speed and accuracy. - -3. **Monitor vector dimension and dataset size:** - Large vector dimensions or dataset sizes can slow down searches. - -## 4. LLM Integration Issues - -1. **Check LLM response times:** - Monitor the time taken by the LLM to generate responses. - -2. **Verify API rate limits:** - Ensure you're not hitting rate limits for cloud-based LLMs. - -3. **For local LLMs (e.g., Ollama):** - - Check resource utilization (CPU, GPU, memory) - - Consider using a more efficient model or quantized version - -## 5. Network Latency - -1. **Check network latency between components:** - ```bash - ping - ``` - -2. **Use tools like `traceroute` to identify network bottlenecks:** - ```bash - traceroute - ``` - -3. **If using cloud services, ensure all components are in the same region.** - -## 6. Resource Constraints - -1. **Monitor system resources:** - ```bash - top - htop # If available - ``` - -2. **Check Docker resource allocation:** - ```bash - docker stats - ``` - -3. **Increase resources if necessary:** - - Adjust Docker resource limits - - Scale up cloud instances - - Add more nodes to your cluster - -## 7. Caching - -1. **Implement or optimize caching strategies:** - - Use Redis or Memcached for frequently accessed data - - Implement application-level caching - -2. **Check cache hit rates:** - Monitor your caching system's performance metrics. - -## 8. Query Optimization - -1. **Review and optimize complex queries:** - - Break down complex queries into simpler ones - - Use appropriate JOIN types in SQL queries - -2. **Use query parameterization:** - Avoid string concatenation in queries to leverage query plan caching. - -## 9. Hatchet Workflow Optimization - -1. **Review workflow designs:** - Ensure workflows are optimized and not causing unnecessary delays. - -2. **Monitor Hatchet logs:** - Check for any warnings or errors that might indicate performance issues. - -## 10. Logging and Monitoring - -1. **Implement comprehensive logging:** - Use logging to identify slow operations and bottlenecks. - -2. **Set up monitoring and alerting:** - Use tools like Prometheus and Grafana to monitor system performance. - -## 11. Data Volume and Scaling - -1. **Check data volume:** - Large amounts of data can slow down queries. Consider data archiving or partitioning. - -2. **Implement sharding:** - For very large datasets, consider implementing database sharding. - -3. **Scale horizontally:** - Add more nodes to your database cluster to distribute the load. - -## Conclusion - -Resolving slow query responses often requires a systematic approach to identify and address bottlenecks. Start with the most likely culprits based on your specific setup and gradually work through the list. Remember to test thoroughly after each change to ensure the issue is resolved without introducing new problems. - -If you continue to experience issues after trying these steps, consider reaching out to the R2R community or support channels for more specialized assistance. diff --git a/docs/documentation/deployment/troubleshooting/timeouts.mdx b/docs/documentation/deployment/troubleshooting/timeouts.mdx deleted file mode 100644 index 8f2d71628..000000000 --- a/docs/documentation/deployment/troubleshooting/timeouts.mdx +++ /dev/null @@ -1,187 +0,0 @@ -# R2R Troubleshooting Guide: Connection Timeouts - -Connection timeouts can occur at various points in an R2R deployment, affecting different components and services. This guide will help you identify, diagnose, and resolve connection timeout issues. - -## 1. Identifying Connection Timeouts - -Connection timeouts typically manifest as: -- Slow or unresponsive API calls -- Error messages mentioning "timeout" or "connection refused" -- Services failing to start or becoming unhealthy -- Incomplete data processing or retrieval - -## 2. Common Causes of Connection Timeouts - -1. Network issues -2. Misconfigured firewall rules -3. Overloaded services -4. Incorrect connection settings -5. DNS resolution problems -6. Service dependencies not ready - -## 3. Diagnosing Connection Timeouts - -### 3.1 Check Service Health - -```bash -docker-compose ps -``` - -Look for services in an unhealthy or exit state. - -### 3.2 Inspect Service Logs - -```bash -docker-compose logs -``` - -Search for timeout-related error messages. - -### 3.3 Network Connectivity Test - -Test network connectivity between services: - -```bash -docker-compose exec ping -``` - -### 3.4 Check Resource Usage - -Monitor CPU, memory, and disk usage: - -```bash -docker stats -``` - -## 4. Resolving Common Connection Timeout Issues - -### 4.1 R2R API Timeouts - -If the R2R API is timing out: - -1. Check the R2R service logs: - ```bash - docker-compose logs r2r - ``` - -2. Verify the R2R service is healthy: - ```bash - docker-compose exec r2r curl -f http://localhost:7272/v2/health - ``` - -3. Increase the timeout settings in your R2R configuration file. - -### 4.2 Database Connection Timeouts - -For Postgres connection issues: - -1. Verify database service is running: - ```bash - docker-compose ps postgres - ``` - -2. Check database logs: - ```bash - docker-compose logs postgres - ``` - -3. Ensure correct connection strings in R2R configuration. - -4. Increase connection pool size or timeout settings. - -### 4.3 Hatchet Engine Timeouts - -If Hatchet Engine is experiencing timeouts: - -1. Check Hatchet Engine logs: - ```bash - docker-compose logs hatchet-engine - ``` - -2. Verify RabbitMQ is running and accessible: - ```bash - docker-compose exec hatchet-rabbitmq rabbitmqctl status - ``` - -3. Increase Hatchet client timeout settings in R2R configuration. - -### 4.4 Ollama LLM Timeouts - -For timeouts related to Ollama: - -1. Check Ollama service status: - ```bash - docker-compose ps ollama - ``` - -2. Verify Ollama logs: - ```bash - docker-compose logs ollama - ``` - -3. Ensure Ollama models are properly loaded: - ```bash - docker-compose exec ollama ollama list - ``` - -4. Increase LLM timeout settings in R2R configuration. - -## 5. Advanced Troubleshooting - -### 5.1 Network Diagnostics - -Use network diagnostic tools within containers: - -```bash -docker-compose exec r2r sh -c "apt-get update && apt-get install -y iputils-ping net-tools" -docker-compose exec r2r netstat -tuln -``` - -### 5.2 DNS Resolution - -Check DNS resolution within containers: - -```bash -docker-compose exec r2r nslookup postgres -``` - -### 5.3 Firewall Rules - -Verify firewall settings on the host machine: - -```bash -sudo iptables -L -``` - -Ensure necessary ports are open for inter-container communication. - -### 5.4 Docker Network Inspection - -Inspect the Docker network: - -```bash -docker network inspect r2r-network -``` - -Verify all services are properly connected to the network. - -## 6. Preventive Measures - -1. Implement robust health checks for all services. -2. Use appropriate timeout and retry mechanisms in your application code. -3. Monitor system resources and scale services as needed. -4. Regularly update and maintain all components of your R2R deployment. -5. Implement logging and monitoring solutions for early detection of issues. - -## 7. Seeking Further Assistance - -If connection timeout issues persist: - -1. Collect comprehensive logs from all services. -2. Document the steps you've taken to troubleshoot. -3. Check the R2R documentation and community forums for similar issues. -4. Consider reaching out to the R2R support channels or community for specialized assistance. - -Remember to provide detailed information about your deployment environment, configuration settings, and the specific timeout scenarios you're encountering when seeking help. - -By following this guide, you should be able to diagnose and resolve most connection timeout issues in your R2R deployment. If problems persist, don't hesitate to seek additional support from the R2R community or professional services. diff --git a/docs/documentation/deployment/troubleshooting/toml_errors.mdx b/docs/documentation/deployment/troubleshooting/toml_errors.mdx deleted file mode 100644 index 4e29ab237..000000000 --- a/docs/documentation/deployment/troubleshooting/toml_errors.mdx +++ /dev/null @@ -1,178 +0,0 @@ -# Troubleshooting Guide: TOML File Syntax Errors in R2R Configuration - -TOML (Tom's Obvious, Minimal Language) is used for R2R configuration files. Syntax errors in these files can prevent R2R from starting or functioning correctly. This guide will help you identify and resolve common TOML syntax issues. - -## 1. Common TOML Syntax Errors - -### 1.1 Missing or Mismatched Quotes - -**Symptom:** Error message mentioning unexpected character or unterminated string. - -**Example of incorrect syntax:** -```toml -name = "John's Config -``` - -**Correct syntax:** -```toml -name = "John's Config" -``` - -**Solution:** Ensure all string values are properly enclosed in quotes. Use double quotes for strings containing single quotes. - -### 1.2 Incorrect Array Syntax - -**Symptom:** Error about invalid array literals or unexpected tokens. - -**Example of incorrect syntax:** -```toml -fruits = apple, banana, cherry -``` - -**Correct syntax:** -```toml -fruits = ["apple", "banana", "cherry"] -``` - -**Solution:** Use square brackets for arrays and separate elements with commas. - -### 1.3 Indentation Errors - -**Symptom:** Unexpected key error or section not found. - -**Example of incorrect syntax:** -```toml -[database] - host = "localhost" - port = 5432 -``` - -**Correct syntax:** -```toml -[database] -host = "localhost" -port = 5432 -``` - -**Solution:** TOML doesn't require specific indentation, but be consistent. Align key-value pairs at the same level. - -### 1.4 Incorrect Table (Section) Definition - -**Symptom:** Invalid table name or unexpected token after table. - -**Example of incorrect syntax:** -```toml -[database settings] -``` - -**Correct syntax:** -```toml -[database.settings] -``` - -**Solution:** Use dot notation for nested tables instead of spaces. - -### 1.5 Duplicate Keys - -**Symptom:** Duplicate keys error or unexpected overwrite of values. - -**Example of incorrect syntax:** -```toml -[server] -port = 8080 -port = 9090 -``` - -**Correct syntax:** -```toml -[server] -port = 8080 -``` - -**Solution:** Ensure each key is unique within its table. - -## 2. R2R-Specific TOML Issues - -### 2.1 Incorrect LLM Provider Configuration - -**Symptom:** R2R fails to start or connect to LLM provider. - -**Example of incorrect syntax:** -```toml -[llm_provider] -type = "openai" -api_key = ${OPENAI_API_KEY} -``` - -**Correct syntax:** -```toml -[llm_provider] -type = "openai" -api_key = "${OPENAI_API_KEY}" -``` - -**Solution:** Ensure environment variables are properly quoted in the TOML file. - -### 2.2 Misconfigurated Database Settings - -**Symptom:** R2R cannot connect to the database. - -**Example of incorrect syntax:** -```toml -[database] -url = postgres://user:password@localhost:5432/dbname -``` - -**Correct syntax:** -```toml -[database] -url = "postgres://user:password@localhost:5432/dbname" -``` - -**Solution:** Enclose the entire database URL in quotes. - -## 3. Debugging Steps - -1. **Use a TOML Validator:** - - Online tools like [TOML Lint](https://www.toml-lint.com/) can quickly identify syntax errors. - - For local validation, use the `toml` Python package: - ``` - pip install toml - python -c "import toml; toml.load('your_config.toml')" - ``` - -2. **Check R2R Logs:** - - Look for specific error messages related to configuration loading. - - Pay attention to line numbers mentioned in error messages. - -3. **Incrementally Build Configuration:** - - Start with a minimal valid configuration and add sections gradually. - - Test R2R after each addition to isolate the problematic section. - -4. **Use Environment Variables Cautiously:** - - Ensure all environment variables used in the TOML file are properly set. - - Double-check the syntax for referencing environment variables. - -5. **Compare with Example Configurations:** - - Reference the R2R documentation for correct TOML structure examples. - - Ensure your configuration matches the expected format for each section. - -## 4. Best Practices - -1. Use a consistent naming convention for keys (e.g., snake_case). -2. Group related settings under appropriate table headers. -3. Comment your configuration file for clarity. -4. Keep sensitive information (like API keys) in environment variables. -5. Regularly back up your configuration file before making changes. - -## 5. Seeking Help - -If you're still experiencing issues after following this guide: - -1. Check the [R2R documentation](https://r2r-docs.sciphi.ai/) for the most up-to-date configuration guidelines. -2. Search the [R2R GitHub issues](https://github.com/SciPhi-AI/R2R/issues) for similar problems and solutions. -3. If your issue is unique, consider opening a new GitHub issue with your sanitized configuration file and the full error message. - -Remember to remove any sensitive information (like API keys or passwords) before sharing your configuration publicly. - -By following this guide, you should be able to resolve most TOML syntax errors in your R2R configuration. If problems persist, don't hesitate to seek help from the R2R community or support channels. diff --git a/docs/documentation/deployment/troubleshooting/unstructured.mdx b/docs/documentation/deployment/troubleshooting/unstructured.mdx deleted file mode 100644 index 79cf8d7a9..000000000 --- a/docs/documentation/deployment/troubleshooting/unstructured.mdx +++ /dev/null @@ -1,162 +0,0 @@ -# Troubleshooting Guide: Unstructured.io Setup Difficulties with R2R - -Unstructured.io is a crucial component in R2R for handling file ingestion. This guide addresses common issues and their solutions when setting up and using Unstructured.io within the R2R ecosystem. - -## 1. Installation Issues - -### 1.1 Missing Dependencies - -**Problem:** Unstructured.io fails to install due to missing system dependencies. - -**Solution:** -1. Ensure you have the required system libraries: - ```bash - sudo apt-get update - sudo apt-get install -y python3-dev libxml2-dev libxslt1-dev antiword unrtf poppler-utils pstotext tesseract-ocr flac ffmpeg lame libmad0 libsox-fmt-mp3 sox libjpeg-dev swig - ``` -2. If using pip, install with extras: - ```bash - pip install "unstructured[all-deps]" - ``` - -### 1.2 Version Compatibility - -**Problem:** Incompatibility between Unstructured.io and R2R versions. - -**Solution:** -1. Check the R2R documentation for the recommended Unstructured.io version. -2. Install the specific version: - ```bash - pip install unstructured==X.Y.Z - ``` - -## 2. Configuration Issues - -### 2.1 API Key Not Recognized - -**Problem:** R2R fails to connect to Unstructured.io due to API key issues. - -**Solution:** -1. Verify your API key is correctly set in the R2R configuration: - ```toml - [unstructured] - api_key = "your-api-key-here" - ``` -2. Ensure the environment variable is set: - ```bash - export UNSTRUCTURED_API_KEY=your-api-key-here - ``` - -### 2.2 Incorrect API Endpoint - -**Problem:** R2R can't reach the Unstructured.io API. - -**Solution:** -1. Check the API endpoint in your R2R configuration: - ```toml - [unstructured] - api_url = "https://api.unstructured.io/general/v0/general" - ``` -2. If using a self-hosted version, ensure the URL is correct. - -## 3. Runtime Errors - -### 3.1 File Processing Failures - -**Problem:** Unstructured.io fails to process certain file types. - -**Solution:** -1. Verify the file type is supported by Unstructured.io. -2. Check file permissions and ensure R2R has access to the files. -3. For specific file types, install additional dependencies: - ```bash - pip install "unstructured[pdf]" # For enhanced PDF support - ``` - -### 3.2 Memory Issues - -**Problem:** Unstructured.io crashes due to insufficient memory when processing large files. - -**Solution:** -1. Increase the available memory for the R2R process. -2. If using Docker, adjust the container's memory limit: - ```yaml - services: - r2r: - deploy: - resources: - limits: - memory: 4G - ``` - -### 3.3 Slow Processing - -**Problem:** File processing is exceptionally slow. - -**Solution:** -1. Check system resources (CPU, RAM) and ensure they meet minimum requirements. -2. Consider using Unstructured.io's async API for large batch processing. -3. Implement a caching mechanism in R2R to store processed results. - -## 4. Integration Issues - -### 4.1 Data Format Mismatch - -**Problem:** R2R fails to interpret the output from Unstructured.io correctly. - -**Solution:** -1. Verify that R2R's parsing logic matches Unstructured.io's output format. -2. Check for any recent changes in Unstructured.io's API responses and update R2R accordingly. - -### 4.2 Rate Limiting - -**Problem:** Hitting API rate limits when using Unstructured.io's cloud service. - -**Solution:** -1. Implement rate limiting in your R2R application. -2. Consider upgrading your Unstructured.io plan for higher limits. -3. Use local deployment of Unstructured.io for unlimited processing. - -## 5. Local Deployment Issues - -### 5.1 Docker Container Failures - -**Problem:** Unstructured.io Docker container fails to start or crashes. - -**Solution:** -1. Check Docker logs: - ```bash - docker logs [container_name] - ``` -2. Ensure all required environment variables are set. -3. Verify that the Docker image version is compatible with your R2R version. - -### 5.2 Network Connectivity - -**Problem:** R2R can't connect to locally deployed Unstructured.io. - -**Solution:** -1. Ensure the Unstructured.io container is on the same Docker network as R2R. -2. Check firewall settings and ensure necessary ports are open. -3. Verify the URL in R2R configuration points to the correct local address. - -## 6. Debugging Tips - -1. Enable verbose logging in both R2R and Unstructured.io. -2. Use tools like `curl` to test API endpoints directly. -3. Implement proper error handling in R2R to capture and log Unstructured.io-related issues. - -## 7. Seeking Help - -If issues persist: -1. Check the [Unstructured.io documentation](https://unstructured-io.github.io/unstructured/). -2. Visit the [R2R GitHub repository](https://github.com/SciPhi-AI/R2R) for specific integration issues. -3. Reach out to the R2R community on Discord or other support channels. - -Remember to provide detailed information when seeking help, including: -- R2R and Unstructured.io versions -- Deployment method (cloud, local, Docker) -- Specific error messages and logs -- Steps to reproduce the issue - -By following this guide, you should be able to troubleshoot and resolve most Unstructured.io setup and integration issues within your R2R deployment. diff --git a/docs/documentation/deployment/troubleshooting/vector_store_issues.mdx b/docs/documentation/deployment/troubleshooting/vector_store_issues.mdx deleted file mode 100644 index 4f8e6321b..000000000 --- a/docs/documentation/deployment/troubleshooting/vector_store_issues.mdx +++ /dev/null @@ -1,164 +0,0 @@ -# Troubleshooting Guide: Vector Storage Problems in R2R - -Vector storage is a crucial component in R2R (RAG to Riches) for efficient similarity searches. This guide focuses on troubleshooting common vector storage issues, particularly with Postgres and pgvector. - -## 1. Connection Issues - -### Symptom: R2R can't connect to the vector database - -1. **Check Postgres Connection:** - ```bash - psql -h localhost -U your_username -d your_database - ``` - If this fails, the issue might be with Postgres itself, not specifically vector storage. - -2. **Verify Environment Variables:** - Ensure these are correctly set in your R2R configuration: - - `R2R_POSTGRES_USER` - - `R2R_POSTGRES_PASSWORD` - - `R2R_POSTGRES_HOST` - - `R2R_POSTGRES_PORT` - - `R2R_POSTGRES_DBNAME` - - `R2R_PROJECT_NAME` - -3. **Check Docker Network:** - If using Docker, ensure the R2R and Postgres containers are on the same network: - ```bash - docker network inspect r2r-network - ``` - -## 2. pgvector Extension Issues - -### Symptom: "extension pgvector does not exist" error - -1. **Check if pgvector is Installed:** - Connect to your database and run: - ```sql - SELECT * FROM pg_extension WHERE extname = 'vector'; - ``` - -2. **Install pgvector:** - If not installed, run: - ```sql - CREATE EXTENSION vector; - ``` - -3. **Verify Postgres Version:** - pgvector requires Postgres 11 or later. Check your version: - ```sql - SELECT version(); - ``` - -## 3. Vector Dimension Mismatch - -### Symptom: Error inserting vectors or during similarity search - -1. **Check Vector Dimensions:** - Verify the dimension of vectors you're trying to insert matches your schema: - ```sql - SELECT * FROM information_schema.columns - WHERE table_name = 'your_vector_table' AND data_type = 'vector'; - ``` - -2. **Verify R2R Configuration:** - Ensure the vector dimension in your R2R configuration matches your database schema. - -3. **Recreate Table with Correct Dimensions:** - If dimensions are mismatched, you may need to recreate the table: - ```sql - DROP TABLE your_vector_table; - CREATE TABLE your_vector_table (id bigserial PRIMARY KEY, embedding vector(384)); - ``` - -## 4. Performance Issues - -### Symptom: Slow similarity searches - -1. **Check Index:** - Ensure you have an appropriate index: - ```sql - CREATE INDEX ON your_vector_table USING ivfflat (embedding vector_cosine_ops); - ``` - -2. **Analyze Table:** - Run ANALYZE to update statistics: - ```sql - ANALYZE your_vector_table; - ``` - -3. **Monitor Query Performance:** - Use `EXPLAIN ANALYZE` to check query execution plans: - ```sql - EXPLAIN ANALYZE SELECT * FROM your_vector_table - ORDER BY embedding <=> '[your_vector]' LIMIT 10; - ``` - -4. **Adjust Work Memory:** - If dealing with large vectors, increase work_mem: - ```sql - SET work_mem = '1GB'; - ``` - -## 5. Data Integrity Issues - -### Symptom: Unexpected search results or missing data - -1. **Check Vector Normalization:** - Ensure vectors are normalized before insertion if using cosine similarity. - -2. **Verify Data Insertion:** - Check if data is being correctly inserted: - ```sql - SELECT COUNT(*) FROM your_vector_table; - ``` - -3. **Inspect Random Samples:** - Look at some random entries to ensure data quality: - ```sql - SELECT * FROM your_vector_table ORDER BY RANDOM() LIMIT 10; - ``` - -## 6. Disk Space Issues - -### Symptom: Insertion failures or database unresponsiveness - -1. **Check Disk Space:** - ```bash - df -h - ``` - -2. **Monitor Postgres Disk Usage:** - ```sql - SELECT pg_size_pretty(pg_database_size('your_database')); - ``` - -3. **Identify Large Tables:** - ```sql - SELECT relname, pg_size_pretty(pg_total_relation_size(relid)) - FROM pg_catalog.pg_statio_user_tables - ORDER BY pg_total_relation_size(relid) DESC; - ``` - -## 7. Backup and Recovery - -If all else fails, you may need to restore from a backup: - -1. **Create a Backup:** - ```bash - pg_dump -h localhost -U your_username -d your_database > backup.sql - ``` - -2. **Restore from Backup:** - ```bash - psql -h localhost -U your_username -d your_database < backup.sql - ``` - -## Getting Further Help - -If these steps don't resolve your issue: - -1. Check R2R logs for more detailed error messages. -2. Consult the [pgvector documentation](https://github.com/pgvector/pgvector) for advanced troubleshooting. -3. Reach out to the R2R community or support channels with detailed information about your setup and the steps you've tried. - -Remember to always backup your data before making significant changes to your database or vector storage configuration. diff --git a/docs/documentation/deployment/troubleshooting/workflows.mdx b/docs/documentation/deployment/troubleshooting/workflows.mdx deleted file mode 100644 index ad9724627..000000000 --- a/docs/documentation/deployment/troubleshooting/workflows.mdx +++ /dev/null @@ -1,142 +0,0 @@ -# Troubleshooting Guide: Workflow Orchestration Failures in R2R - -Workflow orchestration is a critical component of R2R, managed by Hatchet. When orchestration failures occur, they can disrupt the entire data processing pipeline. This guide will help you identify and resolve common issues. - -## 1. Check Hatchet Service Status - -First, ensure that the Hatchet service is running properly: - -```bash -docker ps | grep hatchet -``` - -Look for containers with names like `hatchet-engine`, `hatchet-api`, and `hatchet-rabbitmq`. - -## 2. Examine Hatchet Logs - -View the logs for the Hatchet engine: - -```bash -docker logs r2r-hatchet-engine-1 -``` - -Look for error messages or warnings that might indicate the source of the problem. - -## 3. Common Issues and Solutions - -### 3.1 Connection Issues with RabbitMQ - -**Symptom:** Hatchet logs show connection errors to RabbitMQ. - -**Solution:** -1. Check if RabbitMQ is running: - ```bash - docker ps | grep rabbitmq - ``` -2. Verify RabbitMQ credentials in the Hatchet configuration. -3. Ensure the RabbitMQ container is in the same Docker network as Hatchet. - -### 3.2 Database Connection Problems - -**Symptom:** Errors in Hatchet logs related to database connections. - -**Solution:** -1. Verify Postgres container is running and healthy: - ```bash - docker ps | grep postgres - ``` -2. Check database connection settings in Hatchet configuration. -3. Ensure the Postgres container is in the same Docker network as Hatchet. - -### 3.3 Workflow Definition Errors - -**Symptom:** Specific workflows fail to start or execute properly. - -**Solution:** -1. Review the workflow definition in your R2R configuration. -2. Check for syntax errors or invalid step definitions. -3. Verify that all required environment variables for the workflow are set. - -### 3.4 Resource Constraints - -**Symptom:** Workflows start but fail due to timeout or resource exhaustion. - -**Solution:** -1. Check system resources (CPU, memory) on the host machine. -2. Adjust resource limits for Docker containers if necessary. -3. Consider scaling up your infrastructure or optimizing workflow resource usage. - -### 3.5 Version Incompatibility - -**Symptom:** Unexpected errors after updating R2R or Hatchet. - -**Solution:** -1. Ensure all components (R2R, Hatchet, RabbitMQ) are compatible versions. -2. Check the R2R documentation for any breaking changes in recent versions. -3. Consider rolling back to a known working version if issues persist. - -## 4. Advanced Debugging - -### 4.1 Inspect Hatchet API - -Use the Hatchet API to get more details about workflow executions: - -1. Find the Hatchet API container: - ```bash - docker ps | grep hatchet-api - ``` -2. Use `curl` to query the API (replace `` with the actual ID): - ```bash - docker exec curl http://localhost:7077/api/v1/workflows - ``` - -### 4.2 Check Hatchet Dashboard - -If you have the Hatchet dashboard set up: - -1. Access the dashboard (typically at `http://localhost:8002` if using default ports). -2. Navigate to the Workflows section to view detailed execution status and logs. - -### 4.3 Analyze RabbitMQ Queues - -Inspect RabbitMQ queues to check for message backlogs or routing issues: - -1. Access the RabbitMQ management interface (typically at `http://localhost:15672`). -2. Check queue lengths, message rates, and any dead-letter queues. - -## 5. Common Workflow-Specific Issues - -### 5.1 Document Ingestion Failures - -**Symptom:** Documents fail to process through the ingestion workflow. - -**Solution:** -1. Check the Unstructured API configuration and connectivity. -2. Verify file permissions and formats of ingested documents. -3. Examine R2R logs for specific ingestion errors. - -### 5.2 Vector Store Update Issues - -**Symptom:** Vector store (Postgres with pgvector) not updating correctly. - -**Solution:** -1. Check Postgres logs for any errors related to vector operations. -2. Verify the pgvector extension is properly installed and enabled. -3. Ensure the R2R configuration correctly specifies the vector store settings. - -## 6. Seeking Further Help - -If you're still experiencing issues after trying these solutions: - -1. Gather all relevant logs (R2R, Hatchet, RabbitMQ, Postgres). -2. Document the steps to reproduce the issue. -3. Check the R2R GitHub repository for similar reported issues. -4. Consider opening a new issue on the R2R GitHub repository with your findings. - -Remember to provide: -- R2R version (`r2r version`) -- Docker Compose configuration -- Relevant parts of your R2R configuration -- Detailed error messages and logs - -By following this guide, you should be able to diagnose and resolve most workflow orchestration issues in R2R. If problems persist, don't hesitate to seek help from the R2R community or support channels. diff --git a/docs/documentation/glossary.mdx b/docs/documentation/glossary.mdx deleted file mode 100644 index 1ea283be8..000000000 --- a/docs/documentation/glossary.mdx +++ /dev/null @@ -1,40 +0,0 @@ ---- -title: 'Glossary' -description: 'Key terms and environment settings for R2R' -icon: 'book' ---- - -## Key Terms - -- **RAG (Retrieval-Augmented Generation)**: A technique that combines information retrieval and language model generation to produce more accurate and informative responses. -- **Ingestion**: The process of parsing and indexing documents into the R2R system for later retrieval and generation. -- **Fragment**: A chunk of text extracted from an ingested document, used for similarity search and context in RAG. -- **Hybrid Search**: A search method that combines semantic vector search with traditional keyword search for improved relevancy. -- **Knowledge Graph**: A structured representation of entities and their relationships, used for advanced querying and reasoning. -- **RAG Agent**: An interactive and intelligent query interface that can formulate its own questions, search for information, and provide informed responses based on retrieved context. -- **Collection**: A grouping of documents and users for efficient access control and organization. - -## Environment Settings - -- `OPENAI_API_KEY`: API key for OpenAI's language models and embeddings. -- `ANTHROPIC_API_KEY`: API key for Anthropic's language models and embeddings. -- `R2R_POSTGRES_USER`: Username for the Postgres database. -- `R2R_POSTGRES_PASSWORD`: Password for the Postgres database. -- `R2R_POSTGRES_HOST`: Hostname or IP address of the Postgres database server. -- `R2R_POSTGRES_PORT`: Port number for the Postgres database server. -- `R2R_POSTGRES_DBNAME`: Name of the Postgres database to use for R2R. -- `R2R_PROJECT_NAME`: Defines the tables within the Postgres database where the selected R2R project resides. -- `R2R_PORT`: Defines the port over which the R2R process is served. -- `R2R_HOST`: Defines the host address over which the R2R process is served. -- `HATCHET_CLIENT_TOKEN`: API token for Hatchet orchestration service (required for full R2R installation). -- `UNSTRUCTURED_API_KEY`: API key for Unstructured.io document parsing service (required for full R2R installation). - -For more information on these terms and settings, please refer to the relevant sections of the documentation: - -- [Installation](/documentation/installation) -- [Configuration](/documentation/configuration) -- [Ingestion](/documentation/configuration/ingestion/overview) -- [Search](/cookbooks/hybrid-search) -- [Knowledge Graph](/cookbooks/graphrag) -- [RAG Agent](/cookbooks/agent) -- [Collections](/cookbooks/collections) diff --git a/docs/documentation/installation/full/docker.mdx b/docs/documentation/installation/full/docker.mdx deleted file mode 100644 index 3bcfb329f..000000000 --- a/docs/documentation/installation/full/docker.mdx +++ /dev/null @@ -1,82 +0,0 @@ -This installation guide is for R2R Core. For solo developers or teams prototyping, we highly recommend starting with R2R Light. - - -This guide will walk you through installing and running R2R using Docker, which is the quickest and easiest way to get started. - -## Prerequisites - -- Docker installed on your system. If you haven't installed Docker yet, please refer to the [official Docker installation guide](https://docs.docker.com/engine/install/). - -## Install the R2R CLI & Python SDK - -First, install the R2R CLI and Python SDK: - -```bash -pip install r2r -``` - - -We are actively developing a distinct CLI binary for R2R for easier installation. Please reach out if you have any specific needs or feature requests. - -## Start R2R with Docker - -The full R2R installation does not use the default [`r2r.toml`](https://github.com/SciPhi-AI/R2R/blob/main/py/r2r.toml), instead it provides overrides through a pre-built custom configuration, [`full.toml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/configs/full.toml). - - - - To start R2R with OpenAI as the default LLM inference and embedding provider: - ```bash - # Set cloud LLM settings - export OPENAI_API_KEY=sk-... - - r2r serve --docker --full - ``` - [Refer here](/documentation/configuration/llm) for more information on how to configure various LLM providers. - - - To start R2R with your local computer as the default LLM inference provider: - ```bash - r2r serve --docker --full --config-name=full_local_llm - ``` - Then, in a separate terminal you will need to run Ollama to provide completions: - ```bash - ollama pull llama3.1 - ollama pull mxbai-embed-large - ollama serve - ``` - The code above assumes that Ollama has already been installed. If you have not yet done so, then refer to the official Ollama webpage [for installation instructions](https://ollama.com/download). For more information on local installation, [refer here](/documentation/local-rag). - - - R2R offers flexibility in selecting and configuring LLMs, allowing you to optimize your RAG pipeline for various use cases. Execute the command below run deploy R2R with your own custom configuration: - ```bash - r2r serve --config-path=/abs/path/to/my_r2r.toml - ``` - - Learn in detail how to [configure your deployment here](/documentation/configuration). - - - - -The above command will automatically pull the necessary Docker images and start all the required containers, including `R2R`, `Hatchet`, and `Postgres+pgvector`. The required additional services come bundled into the full R2R Docker Compose by default. - -The end result is a live server at http://localhost:7272 serving the [R2R API](/api-reference/introduction). - -In addition to launching a RESTful API, the R2R Docker also launches a applications at `localhost:7273` and `localhost:7274`, which you can [read more about here](/cookbooks/application). - -### Stopping R2R - -Safely stop your system by running `r2r docker-down` to avoid potential shutdown complications. - -## Next Steps - -After successfully installing R2R: - -1. **Verify Installation**: Ensure all components are running correctly by accessing the R2R API at http://localhost:7272/v2/health. - -2. **Quick Start**: Follow our [R2R Quickstart Guide](/documentation/quickstart) to set up your first RAG application. - -3. **In-Depth Tutorial**: For a more comprehensive understanding, work through our [R2R Walkthrough](/cookbooks/walkthrough). - -4. **Customize Your Setup**: Configure R2R components with the [Configuration Guide](/documentation/configuration). - -If you encounter any issues during installation or setup, please use our [Discord community](https://discord.gg/p6KqD2kjtB) or [GitHub repository](https://github.com/SciPhi-AI/R2R) to seek assistance. diff --git a/docs/documentation/installation/full/local-system.mdx b/docs/documentation/installation/full/local-system.mdx deleted file mode 100644 index 89470f2d7..000000000 --- a/docs/documentation/installation/full/local-system.mdx +++ /dev/null @@ -1,144 +0,0 @@ -This installation guide is for R2R Core. For solo developers or teams prototyping, we highly recommend starting with R2R Light. -# R2R Local System Installation - -This guide will walk you through installing and running R2R on your local system without using Docker. This method allows for more customization and control over individual components. - - - - Local installation of R2R Core is challenging due to the numerous services it integrates. We strongly recommend using Docker to get started quickly. - - If you choose to proceed with a local installation, be prepared to set up and configure the following services: - - 1. **Postgres with pgvector**: A relational database with vector storage capabilities. - 2. **Unstructured.io**: A complex system for file ingestion. - 4. **Hatchet**: A RabbitMQ-based orchestration system. - - Alternatively, you can use cloud versions of these services, but you'll be responsible for enrolling in them and providing the necessary environment variables. - - Each of these components has its own requirements, potential compatibility issues, and configuration complexities. Debugging issues in a local setup can be significantly more challenging than using a pre-configured Docker environment. - - - - Unless you have a specific need for a local installation and are comfortable with advanced system configuration, we highly recommend using the Docker setup method for a smoother experience. -## Prerequisites - -Before starting, ensure you have the following installed and/or available in the cloud: -- Python 3.12 or higher -- pip (Python package manager) -- Git -- Postgres + pgvector -- Unstructured file ingestion -- Hatchet workflow orchestration - -## Install the R2R CLI & Python SDK - -First, install the R2R CLI and Python SDK: - -```bash -pip install 'r2r[core ingestion-bundle hatchet]' -``` - -## Environment Setup - -R2R requires connections to various services. Set up the following environment variables based on your needs: - - - - Note, cloud providers are optional as R2R can be run entirely locally. - ```bash - # Set cloud LLM settings - export OPENAI_API_KEY=sk-... - # export ANTHROPIC_API_KEY=... - # ... - ``` - - - R2R uses [Hatchet](https://docs.hatchet.run/home) for orchestration. When building R2R locally, you will need to either (a) register for [Hatchet's cloud service](https://cloud.onhatchet.run/auth/login/) or (b) install it [locally following their instructions](https://docs.hatchet.run/self-hosting/docker-compose), or (c) deploy the R2R docker and connect with the internally provisioned Hatchet service. - ```bash - # Set cloud LLM settings - export HATCHET_CLIENT_TOKEN=... - ``` - - - With R2R you can connect to your own instance of Postgres+pgvector or a remote cloud instance. - ```bash - # Set Postgres+pgvector settings - export R2R_POSTGRES_USER=$YOUR_POSTGRES_USER - export R2R_POSTGRES_PASSWORD=$YOUR_POSTGRES_PASSWORD - export R2R_POSTGRES_HOST=$YOUR_POSTGRES_HOST - export R2R_POSTGRES_PORT=$YOUR_POSTGRES_PORT - export R2R_POSTGRES_DBNAME=$YOUR_POSTGRES_DBNAME - export R2R_PROJECT_NAME=$YOUR_PROJECT_NAME # see note below - ``` - - The `R2R_PROJECT_NAME` environment variable defines the tables within your Postgres database where the selected R2R project resides. If the specified tables do not exist then they will be created by R2R during initialization. - - - - By default, R2R uses [unstructured.io](https://docs.unstructured.io/welcome) to handle file ingestion. Unstructured can be: - - 1. Installed locally by following their [documented instructions](https://docs.unstructured.io/open-source/introduction/overview) - 2. Connected to via the cloud - - For cloud connections, set the following environment variable: - - ```bash - # Set Unstructured API key for cloud usage - export UNSTRUCTURED_API_KEY=your_api_key_here - ``` - - Alternatively, R2R has its own lightweight end-to-end ingestion, which may be more appropriate in some situations. This can be specified in your server configuration by choosing `r2r` as your ingestion provider. - - - -## Running R2R - -The full R2R installation does not use the default [`r2r.toml`](https://github.com/SciPhi-AI/R2R/blob/main/py/r2r.toml), instead it provides overrides through a pre-built custom configuration, [`full.toml`](https://github.com/SciPhi-AI/R2R/blob/main/py/core/configs/full.toml). - -After setting up your environment, you can start R2R using the following command: - -```bash -# requires services for unstructured, hatchet, postgres -r2r serve --config-name=full -``` - -For local LLM usage: - -```bash -r2r serve --config-name=full_local_llm -``` - -## Python Development Mode - -For those looking to develop R2R locally: - -1. Install Poetry: Follow instructions on the [official Poetry website](https://python-poetry.org/docs/#installation). - -2. Clone and install dependencies: - ```bash - git clone git@github.com:SciPhi-AI/R2R.git - cd R2R/py - poetry install -E "core ingestion-bundle hatchet" - ``` - -3. Setup environment: - Follow the steps listed in the Environment Setup section above. Additionally, you may introduce a local .env file to make development easier, and you can customize your local `r2r.toml` to suit your specific needs. - -4. Start your server: - ```bash - poetry run r2r serve --config-name=core - ``` - -## Next Steps - -After successfully installing R2R: - -1. **Verify Installation**: Ensure all components are running correctly by accessing the R2R API at http://localhost:7272/v2/health. - -2. **Quick Start**: Follow our [R2R Quickstart Guide](/documentation/quickstart) to set up your first RAG application. - -3. **In-Depth Tutorial**: For a more comprehensive understanding, work through our [R2R Walkthrough](/cookbooks/walkthrough). - -4. **Customize Your Setup**: Configure R2R components with the [Configuration Guide](/documentation/configuration). - -If you encounter any issues during installation or setup, please use our [Discord community](https://discord.gg/p6KqD2kjtB) or [GitHub repository](https://github.com/SciPhi-AI/R2R) to seek assistance. diff --git a/docs/documentation/installation/light/docker.mdx b/docs/documentation/installation/light/docker.mdx deleted file mode 100644 index 203722a20..000000000 --- a/docs/documentation/installation/light/docker.mdx +++ /dev/null @@ -1,81 +0,0 @@ -# R2R Docker Installation - -This guide will walk you through installing and running R2R using Docker, which is the quickest and easiest way to get started. - -## Prerequisites - -- Docker installed on your system. If you haven't installed Docker yet, please refer to the [official Docker installation guide](https://docs.docker.com/engine/install/). - -## Install the R2R CLI & Python SDK - -First, install the R2R CLI and Python SDK: - -```bash -pip install r2r -``` - -We are actively developing a distinct CLI binary for R2R for easier installation. Please reach out if you have any specific needs or feature requests. - -## Start R2R with Docker - - - - To start R2R with OpenAI as the default LLM inference and embedding provider: - ```bash - # Set cloud LLM settings - export OPENAI_API_KEY=sk-... - - r2r serve --docker - ``` - [Refer here](/documentation/configuration/llm) for more information on how to configure various LLM providers. - - - To start R2R with your local computer as the default LLM inference provider: - ```bash - r2r serve --docker --config-name=local_llm - ``` - Then, in a separate terminal you will need to run Ollama to provide completions: - ```bash - ollama pull llama3.1 - ollama pull mxbai-embed-large - ollama serve - ``` - The code above assumes that Ollama has already been installed. If you have not yet done so, then refer to the official Ollama webpage [for installation instructions](https://ollama.com/download). For more information on local installation, [refer here](/documentation/local-rag). - - - R2R offers flexibility in selecting and configuring LLMs, allowing you to optimize your RAG pipeline for various use cases. Execute the command below run deploy R2R with your own custom configuration: - ```bash - r2r serve --config-path=/abs/path/to/my_r2r.toml - ``` - - Learn in detail how to [configure your deployment here](/documentation/configuration). - - - - -Postgres comes bundled into the R2R Docker by default. - - -The above command will automatically pull the necessary Docker images and start all the required containers, including `R2R`, `Postgres+pgvector`. - -The end result is a live server at http://localhost:7272 serving the [R2R API](/api-reference/introduction). - -In addition to launching a RESTful API, the R2R Docker also launches a applications at `localhost:7273` and `localhost:7274`, which you can [read more about here](/cookbooks/application). - -### Stopping R2R - -Safely stop your system by running `r2r docker-down` to avoid potential shutdown complications. - -## Next Steps - -After successfully installing R2R: - -1. **Verify Installation**: Ensure all components are running correctly by accessing the R2R API at http://localhost:7272/v2/health. - -2. **Quick Start**: Follow our [R2R Quickstart Guide](/documentation/quickstart) to set up your first RAG application. - -3. **In-Depth Tutorial**: For a more comprehensive understanding, work through our [R2R Walkthrough](/cookbooks/walkthrough). - -4. **Customize Your Setup**: Configure R2R components with the [Configuration Guide](/documentation/configuration). - -If you encounter any issues during installation or setup, please use our [Discord community](https://discord.gg/p6KqD2kjtB) or [GitHub repository](https://github.com/SciPhi-AI/R2R) to seek assistance. diff --git a/docs/documentation/installation/light/local-system.mdx b/docs/documentation/installation/light/local-system.mdx deleted file mode 100644 index 7d152015b..000000000 --- a/docs/documentation/installation/light/local-system.mdx +++ /dev/null @@ -1,106 +0,0 @@ -# R2R Local System Installation - -This guide will walk you through installing and running R2R on your local system without using Docker. This method allows for more customization and control over individual components. - - -## Prerequisites - -Before starting, ensure you have the following installed and/or available in the cloud: -- Python 3.12 or higher -- pip (Python package manager) -- Git -- Postgres + pgvector - -## Install the R2R CLI and extra dependencies - -First, install the R2R CLI with the additional `light` dependencies: - -```bash -pip install 'r2r[core,ingestion-bundle]' -``` - -The `core` and `ingestion-bundle` dependencies, combined with a Postgres database, provide the necessary components to deploy a user-facing R2R application into production. - -If you need advanced features like orchestration or parsing with `Unstructured.io` then refer to the full installation . - -## Environment Setup - -R2R requires connections to various services. Set up the following environment variables based on your needs: - - - - Note, cloud providers are optional as R2R can be run entirely locally. - ```bash - # Set cloud LLM settings - export OPENAI_API_KEY=sk-... - # export ANTHROPIC_API_KEY=... - # ... - ``` - - - With R2R you can connect to your own instance of Postgres+pgvector or a remote cloud instance. - ```bash - # Set Postgres+pgvector settings - export R2R_POSTGRES_USER=$YOUR_POSTGRES_USER - export R2R_POSTGRES_PASSWORD=$YOUR_POSTGRES_PASSWORD - export R2R_POSTGRES_HOST=$YOUR_POSTGRES_HOST - export R2R_POSTGRES_PORT=$YOUR_POSTGRES_PORT - export R2R_POSTGRES_DBNAME=$YOUR_POSTGRES_DBNAME - export R2R_PROJECT_NAME=$YOUR_PROJECT_NAME # see note below - ``` - - The `R2R_PROJECT_NAME` environment variable defines the tables within your Postgres database where the selected R2R project resides. If the required tables for R2R do not exist then they will be created by R2R during initialization. - - If you are unfamiliar with Postgres then Supabase's free cloud offering is a good place to start. - - - - -## Running R2R - -After setting up your environment, you can start R2R using the following command: - -```bash -r2r serve -``` - -For local LLM usage: - -```bash -r2r serve --config-name=local_llm -``` - -## Python Development Mode - -For those looking to develop R2R locally: - -1. Install Poetry: Follow instructions on the [official Poetry website](https://python-poetry.org/docs/#installation). - -2. Clone and install dependencies: - ```bash - git clone git@github.com:SciPhi-AI/R2R.git - cd R2R/py - poetry install -E "core ingestion-bundle" - ``` - -3. Setup environment: - Follow the steps listed in the Environment Setup section above. Additionally, you may introduce a local .env file to make development easier, and you can customize your local `r2r.toml` to suit your specific needs. - -4. Start your server: - ```bash - poetry run r2r serve - ``` - -## Next Steps - -After successfully installing R2R: - -1. **Verify Installation**: Ensure all components are running correctly by accessing the R2R API at http://localhost:7272/v2/health. - -2. **Quick Start**: Follow our [R2R Quickstart Guide](/documentation/quickstart) to set up your first RAG application. - -3. **In-Depth Tutorial**: For a more comprehensive understanding, work through our [R2R Walkthrough](/cookbooks/walkthrough). - -4. **Customize Your Setup**: Configure R2R components with the [Configuration Guide](/documentation/configuration). - -If you encounter any issues during installation or setup, please use our [Discord community](https://discord.gg/p6KqD2kjtB) or [GitHub repository](https://github.com/SciPhi-AI/R2R) to seek assistance. diff --git a/docs/documentation/installation/overview.mdx b/docs/documentation/installation/overview.mdx deleted file mode 100644 index 2aad95618..000000000 --- a/docs/documentation/installation/overview.mdx +++ /dev/null @@ -1,22 +0,0 @@ -# R2R Installation - -Welcome to the R2R installation guide. R2R offers powerful features for your RAG applications, including: - -- **Flexibility**: Run with cloud-based LLMs or entirely on your local machine -- **State-of-the-Art Tech**: Advanced RAG techniques like [hybrid search](/cookbooks/hybrid-search), [GraphRAG](/cookbooks/graphrag), and [agentic RAG](/cookbooks/agent). -- **Auth & Orchestration**: Production must-haves like [auth](/cookbooks/user-auth) and [ingestion orchestration](/cookbooks/orchestration). - -## Choose Your System - - - - - - A lightweight version of R2R, **perfect for quick prototyping and simpler applications**. Some advanced features, like orchestration and advanced document parsing, may not be available. - - - The full-featured R2R system, ideal **for advanced use cases and production deployments**. Includes all components and capabilities, such as **Hatchet** for orchestration and **Unstructured** for parsing. - - - -Choose the system that best aligns with your requirements and proceed with the installation guide. diff --git a/docs/documentation/js-sdk/auth.mdx b/docs/documentation/js-sdk/auth.mdx deleted file mode 100644 index a31dc4293..000000000 --- a/docs/documentation/js-sdk/auth.mdx +++ /dev/null @@ -1,337 +0,0 @@ ---- -title: 'Authentication' -description: 'Manage users in R2R with built-in authentication' ---- - - -Occasionally this SDK documentation falls out of date, cross-check with the automatcially generated API Reference documentation for the latest parameters. - - - -## User Authentication and Management - -R2R provides a comprehensive set of user authentication and management features, allowing you to implement secure and feature-rich authentication systems in your applications. - -### User Registration - -To register a new user: - -```javascript -const registerResponse = await client.register("user@example.com", "password123"); -``` - - - - - ```javascript - { - results: { - email: 'user@example.com', - id: 'bf417057-f104-4e75-8579-c74d26fcbed3', - hashed_password: '$2b$12$p6a9glpAQaq.4uzi4gXQru6PN7WBpky/xMeYK9LShEe4ygBf1L.pK', - is_superuser: false, - is_active: true, - is_verified: false, - verification_code_expiry: null, - name: null, - bio: null, - profile_picture: null, - created_at: '2024-07-16T22:53:47.524794Z', - updated_at: '2024-07-16T22:53:47.524794Z' - } - } - ``` - - - - -### Email Verification - -If email verification is enabled, verify a user's email: - -```javascript -const verifyResponse = await client.verifyEmail("verification_code_here"); -``` - - - - - ```javascript - { - results: { - message: "Email verified successfully" - } - } - ``` - - - - -### User Login - -To log in and obtain access tokens: - -```javascript -const loginResponse = await client.login("user@example.com", "password123"); -``` - - - - - ```javascript - { - results: { - access_token: { - token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', - token_type: 'access' - }, - refresh_token: { - token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', - token_type: 'refresh' - } - } - } - ``` - - - - -### Get Current User Info - -Retrieve information about the currently authenticated user: - -```javascript -const user_info = client.user() -``` - - - - - ```python - { - 'results': { - 'email': 'user@example.com', - 'id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', - 'hashed_password': 'null', - 'is_superuser': False, - 'is_active': True, - 'is_verified': True, - 'verification_code_expiry': None, - 'name': None, - 'bio': None, - 'profile_picture': None, - 'created_at': '2024-07-16T23:06:42.123303Z', - 'updated_at': '2024-07-16T23:22:48.256239Z' - } - } - ``` - - - - -### Refresh Access Token - -Refresh an expired access token: - -```javascript -const refreshResponse = await client.refreshAccessToken(); -``` - - - - - ```javascript - { - results: { - access_token: { - token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', - token_type: 'access' - }, - refresh_token: { - token: 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', - token_type: 'refresh' - } - } - } - ``` - - - - -### Change Password - -Change the user's password: - -```javascript -const changePasswordResult = await client.changePassword("password123", "new_password"); -``` - - - - - ```javascript - { - results: { - message: "Password changed successfully" - } - } - ``` - - - - -### Request Password Reset - -Request a password reset for a user: - -```javascript -const resetRequestResult = await client.requestPasswordReset("user@example.com"); -``` - - - - - ```javascript - { - results: { - message: "If the email exists, a reset link has been sent" - } - } - ``` - - - - -### Confirm Password Reset - -Confirm a password reset using the reset token: - -```javascript -const resetConfirmResult = await client.confirmPasswordReset("reset_token_here", "new_password"); -``` - - - - - ```javascript - { - results: { - message: "Password reset successfully" - } - } - ``` - - - - -### Update User Profile - -Update the user's profile information: - -```javascript -// keeping the user's email as is: -const updateResult = client.updateUser(undefined, "John Doe", "R2R enthusiast"); -``` - - - - - ```python - { - 'results': { - 'email': 'user@example.com', - 'id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', - 'hashed_password': 'null', - 'is_superuser': False, - 'is_active': True, - 'is_verified': True, - 'verification_code_expiry': None, - 'name': 'John Doe', - 'bio': 'R2R enthusiast', - 'profile_picture': None, - 'created_at': '2024-07-16T23:06:42.123303Z', - 'updated_at': '2024-07-16T23:22:48.256239Z' - } - } - ``` - - - - -### Delete User Account - -Delete the user's account: - -```javascript -const user_id = register_response["results"]["id"] # input unique id here -const delete_result = client.delete_user(user_id, "password123") -``` - - - - - ```python - { - 'results': { - 'message': 'User account deleted successfully' - } - } - ``` - - - - -### User Logout - -Log out and invalidate the current access token: - -```javascript -const logoutResponse = await client.logout(); -``` - - - - - ```javascript - { - results: { - message: "Logged out successfully" - } - } - ``` - - - - -### Superuser Capabilities - -Superusers have additional privileges, including access to system-wide operations and sensitive information. To use superuser capabilities, authenticate as a superuser or the default admin: - -```javascript -// Login as admin -const loginResult = await client.login("admin@example.com", "admin_password"); - -// Access superuser features -const usersOverview = await client.usersOverview(); -const logs = await client.logs(); -const analyticsResult = await client.analytics( - { filters: { all_latencies: "search_latency" } }, - { analysis_types: { search_latencies: ["basic_statistics", "search_latency"] } } -); -``` - - -Superuser actions should be performed with caution and only by authorized personnel. Ensure proper security measures are in place when using superuser capabilities. - - -## Security Considerations - -When implementing user authentication, consider the following best practices: - -1. Always use HTTPS in production to encrypt data in transit. -2. Implement rate limiting to protect against brute-force attacks. -3. Use secure password hashing (R2R uses bcrypt by default). -4. Consider implementing multi-factor authentication (MFA) for enhanced security. -5. Conduct regular security audits of your authentication system. - -For more advanced use cases or custom implementations, refer to the R2R documentation or reach out to the community for support. diff --git a/docs/documentation/js-sdk/collections.mdx b/docs/documentation/js-sdk/collections.mdx deleted file mode 100644 index 3a42fad00..000000000 --- a/docs/documentation/js-sdk/collections.mdx +++ /dev/null @@ -1,423 +0,0 @@ ---- - -title: 'Collection Management' -description: 'Manage collections in R2R' ---- - - -Occasionally this SDK documentation falls out of date, cross-check with the automatically generated API Reference documentation for the latest parameters. - - -A collection in R2R is a logical grouping of users and documents that allows for efficient access control and organization. Collections enable you to manage permissions and access to documents at a collection level, rather than individually. - -R2R provides a comprehensive set of collection features, allowing you to implement efficient access control and organization of users and documents in your applications. - - -Collection permissioning in R2R is still under development and as a result the API will likely evolve. - - -## Collection Creation and Management - -### Create a Collection - -Create a new collection with a name and optional description: - -```javascript -const createCollectionResponse = await client.createCollection( - "Marketing Team", - "Collection for marketing department" -); -const collectionId = createCollectionResponse.results.collection_id; // '123e4567-e89b-12d3-a456-426614174000' -``` - - - - - ```javascript - { - results: { - collection_id: '123e4567-e89b-12d3-a456-426614174000', - name: 'Marketing Team', - description: 'Collection for marketing department', - created_at: '2024-07-16T22:53:47.524794Z', - updated_at: '2024-07-16T22:53:47.524794Z' - } - } - ``` - - - - -### Get Collection Details - -Retrieve details about a specific collection: - -```javascript -const collectionDetails = await client.getCollection(collectionId); -``` - - - - - ```javascript - { - results: { - collection_id: '123e4567-e89b-12d3-a456-426614174000', - name: 'Marketing Team', - description: 'Collection for marketing department', - created_at: '2024-07-16T22:53:47.524794Z', - updated_at: '2024-07-16T22:53:47.524794Z' - } - } - ``` - - - - -### Update a Collection - -Update a collection's name or description: - -```javascript -const updateResult = await client.updateCollection( - collectionId, - "Updated Marketing Team", - "New description for marketing team" -); -``` - - - - - ```javascript - { - results: { - collection_id: '123e4567-e89b-12d3-a456-426614174000', - name: 'Updated Marketing Team', - description: 'New description for marketing team', - created_at: '2024-07-16T22:53:47.524794Z', - updated_at: '2024-07-16T23:15:30.123456Z' - } - } - ``` - - - - -### List Collections - -Get a list of all collections: - -```javascript -const collectionsList = await client.listCollections(); -``` - - - - - ```javascript - { - results: [ - { - collection_id: '123e4567-e89b-12d3-a456-426614174000', - name: 'Updated Marketing Team', - description: 'New description for marketing team', - created_at: '2024-07-16T22:53:47.524794Z', - updated_at: '2024-07-16T23:15:30.123456Z' - }, - // ... other collections ... - ] - } - ``` - - - - -## User Management in Collections - -### Add User to Collection - -Add a user to a collection: - -```javascript -const userId = '456e789f-g01h-34i5-j678-901234567890'; // This should be a valid user ID -const addUserResult = await client.addUserToCollection(userId, collectionId); -``` - - - - - ```javascript - { - results: { - message: 'User successfully added to the collection' - } - } - ``` - - - - -### Remove User from Collection - -Remove a user from a collection: - -```javascript -const removeUserResult = await client.removeUserFromCollection(userId, collectionId); -``` - - - - - ```javascript - { - results: { - message: 'User successfully removed from the collection' - } - } - ``` - - - - -### List Users in Collection - -Get a list of all users in a specific collection: - -```javascript -const usersInCollection = await client.getUsersInCollection(collectionId); -``` - - - - - ```javascript - { - results: [ - { - user_id: '456e789f-g01h-34i5-j678-901234567890', - email: 'user@example.com', - name: 'John Doe', - // ... other user details ... - }, - // ... other users ... - ] - } - ``` - - - - -### Get User's Collections - -Get all collections that a user is a member of: - -```javascript -const userCollections = await client.getCollectionsForUser(userId); -``` - - - - - ```javascript - { - results: [ - { - collection_id: '123e4567-e89b-12d3-a456-426614174000', - name: 'Updated Marketing Team', - // ... other collection details ... - }, - // ... other collections ... - ] - } - ``` - - - - -## Document Management in Collections - -### Assign Document to Collection - -Assign a document to a collection: - -```javascript -const documentId = '789g012j-k34l-56m7-n890-123456789012'; // Must be a valid document ID -const assignDocResult = await client.assignDocumentToCollection(documentId, collectionId); -``` - - - - - ```javascript - { - results: { - message: 'Document successfully assigned to the collection' - } - } - ``` - - - - -### Remove Document from Collection - -Remove a document from a collection: - -```javascript -const removeDocResult = await client.removeDocumentFromCollection(documentId, collectionId); -``` - - - - - ```javascript - { - results: { - message: 'Document successfully removed from the collection' - } - } - ``` - - - - -### List Documents in Collection - -Get a list of all documents in a specific collection: - -```javascript -const docsInCollection = await client.getDocumentsInCollection(collectionId); -``` - - - - - ```javascript - { - results: [ - { - document_id: '789g012j-k34l-56m7-n890-123456789012', - title: 'Marketing Strategy 2024', - // ... other document details ... - }, - // ... other documents ... - ] - } - ``` - - - - -### Get Document's Collections - -Get all collections that a document is assigned to: - -```javascript -const documentCollections = await client.getDocumentCollections(documentId); -``` - - - - - ```javascript - { - results: [ - { - collection_id: '123e4567-e89b-12d3-a456-426614174000', - name: 'Updated Marketing Team', - // ... other collection details ... - }, - // ... other collections ... - ] - } - ``` - - - - -## Advanced Collection Management - -### Collection Overview - -Get an overview of collections, including user and document counts: - -```javascript -const collectionsOverview = await client.collectionsOverview(); -``` - - - - - ```javascript - { - results: [ - { - collection_id: '123e4567-e89b-12d3-a456-426614174000', - name: 'Updated Marketing Team', - description: 'New description for marketing team', - user_count: 5, - document_count: 10, - created_at: '2024-07-16T22:53:47.524794Z', - updated_at: '2024-07-16T23:15:30.123456Z' - }, - // ... other collections ... - ] - } - ``` - - - - -### Delete a Collection - -Delete a collection: - -```javascript -const deleteResult = await client.deleteCollection(collectionId); -``` - - - - - ```javascript - { - results: { - message: 'Collection successfully deleted' - } - } - ``` - - - - -## Pagination and Filtering - -Many collection-related methods support pagination and filtering: - -```javascript -// List collections with pagination -const paginatedCollections = await client.listCollections(10, 20); - -// Get users in a collection with pagination -const paginatedUsers = await client.getUsersInCollection(collectionId, 5, 10); - -// Get documents in a collection with pagination -const paginatedDocs = await client.getDocumentsInCollection(collectionId, 0, 50); - -// Get collections overview with specific collection IDs -const specificCollectionsOverview = await client.collectionsOverview(['id1', 'id2', 'id3']); -``` - -## Security Considerations - -When implementing collection permissions, consider the following security best practices: - -1. Always use HTTPS in production to encrypt data in transit. -2. Implement the principle of least privilege by assigning the minimum necessary permissions to users and collections. -3. Regularly audit collection memberships and document assignments. -4. Ensure that only authorized users (e.g., admins) can perform collection management operations. -5. Implement comprehensive logging for all collection-related actions. -6. Consider implementing additional access controls or custom roles within your application logic for more fine-grained permissions. - -For more advanced use cases or custom implementations, refer to the R2R documentation or reach out to the community for support. diff --git a/docs/documentation/js-sdk/conversations.mdx b/docs/documentation/js-sdk/conversations.mdx deleted file mode 100644 index c0b0298b0..000000000 --- a/docs/documentation/js-sdk/conversations.mdx +++ /dev/null @@ -1,183 +0,0 @@ ---- -title: 'Conversations' -description: 'Managing conversations with R2R.' ---- - - -This feature is currently in beta. Functionality may change, and we value your feedback around these features. - - - -Occasionally this SDK documentation falls out of date, cross-check with the automatically generated API Reference documentation for the latest parameters. - - -## Conversation Management - -### Get Conversations Overview - -Retrieve an overview of existing conversations: - -```javascript -const offset = 0; -const limit = 10; -const overviewResponse = await client.conversationsOverview(offset, limit); -``` - - - - - The response containing an overview of conversations. - - - - - - The offset to start listing conversations from. - - - - The maximum number of conversations to return. - - -### Get Conversation - -Fetch a specific conversation by its UUID: - -```javascript -const conversationId = '123e4567-e89b-12d3-a456-426614174000'; -const conversation = await client.getConversation(conversationId); -``` - - - - - The response containing the requested conversation details. - - - - - - The UUID of the conversation to retrieve. - - -### Create Conversation - -Create a new conversation: - -```javascript -const newConversation = await client.createConversation(); -``` - - - - - The response containing details of the newly created conversation. - - - - -### Add Message - -Add a message to an existing conversation: - -```javascript -const conversationId = '123e4567-e89b-12d3-a456-426614174000'; -const message = { text: 'Hello, world!' }; -const parentId = '98765432-e21b-12d3-a456-426614174000'; -const metadata = { key: 'value' }; - -const addMessageResponse = await client.addMessage(conversationId, message, parentId, metadata); -``` - - - - - The response after adding the message to the conversation. - - - - - - The UUID of the conversation to add the message to. - - - - The message object to add to the conversation. - - - - An optional UUID of the parent message. - - - - An optional metadata object for the message. - - -### Update Message - -Update an existing message in a conversation: - -```javascript -const messageId = '98765432-e21b-12d3-a456-426614174000'; -const updatedMessage = { text: 'Updated message content' }; - -const updateMessageResponse = await client.updateMessage(messageId, updatedMessage); -``` - - - - - The response after updating the message. - - - - - - The UUID of the message to update. - - - - The updated message object. - - -### Get Branches Overview - -Retrieve an overview of branches in a conversation: - -```javascript -const conversationId = '123e4567-e89b-12d3-a456-426614174000'; -const branchesOverview = await client.branchesOverview(conversationId); -``` - - - - - The response containing an overview of branches in the conversation. - - - - - - The UUID of the conversation to get branches for. - - -### Delete Conversation - -Delete a conversation by its UUID: - -```javascript -const conversationId = '123e4567-e89b-12d3-a456-426614174000'; -const deleteResponse = await client.deleteConversation(conversationId); -``` - - - - - The response after deleting the conversation. - - - - - - The UUID of the conversation to delete. - diff --git a/docs/documentation/js-sdk/ingestion.mdx b/docs/documentation/js-sdk/ingestion.mdx deleted file mode 100644 index c8c419b49..000000000 --- a/docs/documentation/js-sdk/ingestion.mdx +++ /dev/null @@ -1,489 +0,0 @@ ---- -title: 'Ingestion' -description: 'Ingesting files with R2R.' ---- - - -This SDK documentation is periodically updated. For the latest parameter details, please cross-reference with the API Reference documentation. - - -Inside R2R, `ingestion` refers to the complete pipeline for processing input data: -- Parsing files into text -- Chunking text into semantic units -- Generating embeddings -- Storing data for retrieval - -Ingested files are stored with an associated document identifier as well as a user identifier to enable comprehensive management. - -## Document Ingestion and Management - - -R2R has recently expanded the available options for ingesting files using multimodal foundation models. In addition to using such models by default for images, R2R can now use them on PDFs, [like it is shown here](https://github.com/getomni-ai/zerox), by passing the following in your ingestion configuration: - -```json -{ - "ingestion_config": { - "parser_overrides": { - "pdf": "zerox" - } - } -} -``` - -We recommend this method for achieving the highest quality ingestion results. - - -### Ingest Files - -Ingest files or directories into your R2R system: - -```javascript -const files = [ - { path: 'path/to/file1.txt', name: 'file1.txt' }, - { path: 'path/to/file2.txt', name: 'file2.txt' } -]; -const metadatas = [ - { key1: 'value1' }, - { key2: 'value2' } -]; - -// Runtime chunking configuration -const ingestResponse = await client.ingestFiles(files, { - metadatas, - user_ids: ['user-id-1', 'user-id-2'], - ingestion_config: { - provider: "unstructured_local", // Local processing - strategy: "auto", // Automatic processing strategy - chunking_strategy: "by_title", // Split on title boundaries - new_after_n_chars: 256, // Start new chunk (soft limit) - max_characters: 512, // Maximum chunk size (hard limit) - combine_under_n_chars: 64, // Minimum chunk size - overlap: 100, // Character overlap between chunks - } -}); -``` - -[Previous sections remain the same through the Update Files code example, then continuing with:] - - - - - The response from the R2R system after updating the files. - ```bash - { - 'results': { - 'processed_documents': [ - { - 'id': '9f375ce9-efe9-5b57-8bf2-a63dee5f3621', - 'title': 'updated_doc.txt' - } - ], - 'failed_documents': [], - 'skipped_documents': [] - } - } - ``` - - - - - - Array of files to update. - - - - - Document IDs corresponding to files being updated. - - - - Optional metadata for updated files. - - - - Chunking configuration options. - - - - - - Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result. - - - -### Update Chunks - -Update the content of an existing chunk in your R2R system: - -```javascript -const documentId = "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"; -const extractionId = "aeba6400-1bd0-5ee9-8925-04732d675434"; - -const updateResponse = await client.updateChunks({ - document_id: documentId, - extraction_id: extractionId, - text: "Updated chunk content...", - metadata: { - source: "manual_edit", - edited_at: "2024-10-24" - } -}); -``` - - - - - The response from the R2R system after updating the chunk. - ```bash - { - 'message': 'Update chunk task queued successfully.', - 'task_id': '7e27dfca-606d-422d-b73f-2d9e138661b4', - 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1' - } - ``` - - - - - - - The ID of the document containing the chunk to update. - - - - The ID of the specific chunk to update. - - - - The new text content to replace the existing chunk text. - - - - An optional metadata object for the updated chunk. If provided, this will replace the existing chunk metadata. - - - - Whether or not the update runs with orchestration, default is `true`. When set to `false`, the update process will run synchronous and directly return the result. - - - - -### Documents Overview - -Retrieve high-level document information: - -```javascript -// Get all documents (paginated) -const documentsOverview = await client.documentsOverview(); - -// Get specific documents -const specificDocs = await client.documentsOverview({ - document_ids: ['doc-id-1', 'doc-id-2'], - offset: 0, - limit: 10 -}); -``` - -Results are restricted to the current user's files unless the request is made by a superuser. - - - - - ```bash - [ - { - 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', - 'version': 'v1', - 'size_in_bytes': 73353, - 'ingestion_status': 'success', - 'restructuring_status': 'pending', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'title': 'aristotle.txt', - 'created_at': '2024-07-21T20:09:14.218741Z', - 'updated_at': '2024-07-21T20:09:14.218741Z', - 'metadata': {'title': 'aristotle.txt', 'version': 'v0', 'x': 'y'} - }, - ... - ] - ``` - - - - - - Optional array of document IDs to filter results. - - - - Starting point for pagination, defaults to 0. - - - - Maximum number of results to return, defaults to 100. - - -### Document Chunks - -Fetch and examine chunks for a particular document: - -```javascript -const documentId = '9fbe403b-c11c-5aae-8ade-ef22980c3ad1'; -const chunks = await client.documentChunks( - documentId, - 0, // offset - 100, // limit - false // include_vectors -); -``` - -These chunks represent the atomic units of text after processing. - - - - - ```bash - [ - { - 'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath...', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', - 'extraction_id': 'aeba6400-1bd0-5ee9-8925-04732d675434', - 'fragment_id': 'f48bcdad-4155-52a4-8c9d-8ba06e996ba3', - 'metadata': { - 'title': 'aristotle.txt', - 'version': 'v0', - 'chunk_order': 0, - 'document_type': 'txt', - 'unstructured_filetype': 'text/plain', - 'unstructured_languages': ['eng'] - } - }, - ... - ] - ``` - - - - - - ID of the document to retrieve chunks for. - - - - Starting point for pagination, defaults to 0. - - - - Maximum number of chunks to return, defaults to 100. - - - - Whether to include embedding vectors in response. - - -### Delete Documents - -Delete documents using filters: - -```javascript -const deleteResponse = await client.delete({ - document_id: { - "$eq": "91662726-7271-51a5-a0ae-34818509e1fd" - } -}); - -// Delete multiple documents -const bulkDelete = await client.delete({ - user_id: { - "$in": ["user-1", "user-2"] - } -}); -``` - - - - - ```bash - {'results': {}} - ``` - - - - - - Filter conditions to identify documents for deletion. - - -## Vector Index Management - -### Create Vector Index - - -Vector indices significantly improve search performance for large collections but add overhead for smaller datasets. Only create indices when working with hundreds of thousands of documents or when search latency is critical. - - -Create a vector index for similarity search: - -```javascript -const createResponse = await client.createVectorIndex({ - tableName: "vectors", - indexMethod: "hnsw", - indexMeasure: "cosine_distance", - indexArguments: { - m: 16, // Number of connections - ef_construction: 64 // Build time quality factor - }, - concurrently: true -}); -``` - - - - - ```bash - { - 'message': 'Vector index creation task queued successfully.', - 'task_id': '7d38dfca-606d-422d-b73f-2d9e138661b5' - } - ``` - - - - - - Table to create index on: vectors, entities_document, entities_collection, communities. - - - - Index method: hnsw, ivfflat, or auto. - - - - Distance measure: cosine_distance, l2_distance, or max_inner_product. - - - - Configuration for chosen index method. - - - Number of connections per element (16-64). - - - Size of candidate list during construction (64-200). - - - - - Number of clusters/inverted lists. - - - - -### List Vector Indices - -List existing indices: - -```javascript -const indices = await client.listVectorIndices({ - tableName: "vectors" -}); -``` - - - - - ```bash - { - 'indices': [ - { - 'name': 'ix_vector_cosine_ops_hnsw__20241021211541', - 'table': 'vectors', - 'method': 'hnsw', - 'measure': 'cosine_distance' - }, - ... - ] - } - ``` - - - - -### Delete Vector Index - -Remove an existing index: - -```javascript -const deleteResponse = await client.deleteVectorIndex({ - indexName: "ix_vector_cosine_ops_hnsw__20241021211541", - tableName: "vectors", - concurrently: true -}); -``` - - - - - ```bash - { - 'message': 'Vector index deletion task queued successfully.', - 'task_id': '8e49efca-606d-422d-b73f-2d9e138661b6' - } - ``` - - - - -## Best Practices and Performance Optimization - -### Vector Index Configuration - -1. **HNSW Parameters:** - - `m`: Higher values (16-64) improve search quality but increase memory - - `ef_construction`: Higher values improve quality but slow construction - - Recommended starting point: `m=16`, `ef_construction=64` - -2. **Distance Measures:** - - `cosine_distance`: Best for normalized vectors (most common) - - `l2_distance`: Better for absolute distances - - `max_inner_product`: Optimized for dot product similarity - -3. **Production Considerations:** - - Always use `concurrently: true` to avoid blocking operations - - Create indexes during off-peak hours - - Pre-warm indices with representative queries - - Monitor memory usage during creation - -### Chunking Strategy - -1. **Size Guidelines:** - - Avoid chunks >1024 characters for retrieval quality - - Keep chunks >64 characters to maintain context - - Use overlap for better context preservation - -2. **Method Selection:** - - Use `by_title` for structured documents - - Use `basic` for uniform text content - - Consider `recursive` for nested content - -## Troubleshooting - -### Common Issues - -1. **Ingestion Failures:** - - Verify file permissions and paths - - Check file format support - - Ensure metadata array length matches files - - Monitor memory for large files - -2. **Vector Index Performance:** - - Check index creation time - - Monitor memory usage - - Verify warm-up queries - - Consider rebuilding if quality degrades - -3. **Chunking Issues:** - - Adjust overlap for context preservation - - Monitor chunk sizes - - Verify language detection - - Check encoding for special characters diff --git a/docs/documentation/js-sdk/introduction.mdx b/docs/documentation/js-sdk/introduction.mdx deleted file mode 100644 index f8842ee69..000000000 --- a/docs/documentation/js-sdk/introduction.mdx +++ /dev/null @@ -1,54 +0,0 @@ ---- -title: 'Introduction' -description: 'Build, scale, and manage user-facing Retrieval-Augmented Generation applications.' ---- - -# R2R JavaScript SDK Documentation - -## Installation - -Before starting, make sure you have completed the [R2R installation](/documentation/installation). - -Install the R2R JavaScript SDK: - -```bash -npm install r2r-js -``` - -## Getting Started - -1. Import the R2R client: - -```javascript -const { r2rClient } = require('r2r-js'); -``` - -2. Initialize the client: - -```javascript -const client = new r2rClient('http://localhost:7272'); -``` - -3. Check if R2R is running correctly: - -```javascript -const healthResponse = await client.health(); -// {"status":"ok"} -``` - -4. Login (Optional): -```javascript -// client.register("me@email.com", "my_password"), -// client.verify_email("me@email.com", "my_verification_code") -client.login("me@email.com", "my_password") -``` -When using authentication the commands below automatically restrict the scope to a user's available documents. - -## Additional Documentation - -For more detailed information on specific functionalities of R2R, please refer to the following documentation: - -- [Document Ingestion](/documentation/python-sdk/ingestion): Learn how to add, retrieve, and manage documents in R2R. -- [Search & RAG](/documentation/python-sdk/retrieval): Explore various querying techniques and Retrieval-Augmented Generation capabilities. -- [Authentication](/documentation/python-sdk/auth): Understand how to manage users and implement authentication in R2R. -- [Observability](/documentation/python-sdk/observability): Learn about analytics and monitoring tools for your R2R system. diff --git a/docs/documentation/js-sdk/observability.mdx b/docs/documentation/js-sdk/observability.mdx deleted file mode 100644 index 5da15a321..000000000 --- a/docs/documentation/js-sdk/observability.mdx +++ /dev/null @@ -1,30 +0,0 @@ ---- -title: 'Observability' -description: 'Manage and observe your R2R system.' ---- - -## Analytics and Observability -R2R provides various tools for analytics and observability to help you monitor and improve the performance of your RAG system. - -```javascript -const scoringResponse = await client.login("message_id_here", "password123"); -``` - - - - - The response from the R2R system. - ```javascript - { 'results': 'ok' } - ``` - - - - - - -The unique identifier of the completion message to be scored. Found in logs for a message. - - -The score to assign to the completion, ranging from -1 to 1. - diff --git a/docs/documentation/js-sdk/retrieval.mdx b/docs/documentation/js-sdk/retrieval.mdx deleted file mode 100644 index f3cb50b0d..000000000 --- a/docs/documentation/js-sdk/retrieval.mdx +++ /dev/null @@ -1,771 +0,0 @@ ---- -title: 'Search & RAG' -description: 'Search and Retrieval-Augmented Generation with R2R.' ---- - - -Occasionally this SDK documentation falls out of date, if in doubt, cross-check with the automatcially generated API Reference documentation . - - -## AI Powered Search - -### Search - -Perform a basic vector search: - -```javascript -const searchResponse = await client.search("What was Uber's profit in 2020?"); -``` - - - - - The search results from the R2R system. - ```javascript - { - results: { - vector_search_results: [ - { - id: '7ed3a01c-88dc-5a58-a68b-6e5d9f292df2', - score: 0.780314067545999, - metadata: { - text: 'Content of the chunk...', - title: 'file1.txt', - version: 'v0', - chunk_order: 0, - document_id: 'c9bdbac7-0ea3-5c9e-b590-018bd09b127b', - extraction_id: '472d6921-b4cd-5514-bf62-90b05c9102cb', - // ... - } - } - // ... - ], - kg_search_results: null - } - } - ``` - - - - - - The search query. - - - - Optional settings for vector search, either a dictionary, a `VectorSearchSettings` object, or `None` may be passed. If a dictionary or `None` is passed, then R2R will use server-side defaults for non-specified fields. - - - - Whether to use vector search. - - - - Whether to perform a hybrid search (combining vector and keyword search). - - - - Commonly seen filters include operations include the following: - - `{"document_id": {"$eq": "9fbe403b-..."}}` - - `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` - - `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` - - `{"$and": {"$document_id": ..., "collection_ids": ...}}` - - - - Maximum number of results to return (1-1000). - - - - Collection Ids to search for. - - - The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product). - - - - Whether to include search score values in the search results. - - - - Whether to include element metadata in the search results. - - - - Number of ivfflat index lists to query. Higher increases accuracy but decreases speed. - - - - Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed. - - - - - Optional settings for knowledge graph search, either a dictionary, a `KGSearchSettings` object, or `None` may be passed. If a dictionary or `None` is passed, then R2R will use server-side defaults for non-specified fields. - - - - - Alias for `search_filters`, now `deprecated`. - - - - Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. - - Commonly seen filters include operations include the following: - - `{"document_id": {"$eq": "9fbe403b-..."}}` - - `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` - - `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` - - `{"$and": {"$document_id": ..., "collection_ids": ...}}` - - - - - - Collection IDs to search for. - - - - The system prompt for the GraphRAG map prompt. - - - - The system prompt for the GraphRAG reduce prompt. - - - - Whether to use knowledge graph search. - - - - The type of knowledge graph search to perform. Supported value: "local". - - - - The level of knowledge graph search to perform. - - - - Configuration for text generation during graph search. - - - - The maximum length of the community description. - - - - The maximum number of LLM queries to perform during global search. - - - - The local search limits for different entity types. The default values are: - - `"__Entity__"`: 20 - - `"__Relationship__"`: 20 - - `"__Community__"`: 20 - - - - - - -### Search custom settings - -Search with custom settings, such as bespoke document filters and larger search limits -```javascript -# returns only chunks from documents with title `document_title` -const filtered_search_response = client.search( - "What was Uber's profit in 2020?", - { filters: { - $eq: "uber_2021.pdf" - }, - search_limit: 100 - }, -) -``` - - -### Hybrid Search - -Combine traditional keyword-based search with vector search: - -```javascript -const hybrid_search_response = client.search( - "What was Uber's profit in 2020?", - { filters: { - $eq: "uber_2021.pdf" - }, - search_limit: 100 - }, -) -``` - -```javascript -hybrid_search_response = client.search( - "What was Uber's profit in 2020?", - { use_hybrid_search: true} -) -``` - -### Knowledge Graph Search - -Utilize knowledge graph capabilities to enhance search results: - -```javascript -const kg_search_response = client.search( - "What is a fierce nerd?", - { use_kg_search: true }, -) -``` - - - - - The knowledge graph search results from the R2R system. - ```python - { - 'results': { - 'vector_search_results': [], - 'kg_search_results': [ - ('MATCH (p:PERSON)-[:FOUNDED]->(c:COMPANY) WHERE c.name = "Airbnb" RETURN p.name', - [{'name': 'Brian Chesky'}, {'name': 'Joe Gebbia'}, {'name': 'Nathan Blecharczyk'}]) - ] - } - } - ``` - - - - -## Retrieval-Augmented Generation (RAG) - -### Basic RAG - -Generate a response using RAG: - -```javascript -const ragResponse = await client.rag("What was Uber's profit in 2020?"); -``` - - - - - The RAG response from the R2R system. - ```python - { - 'results': { - 'completion': { - 'id': 'chatcmpl-9ySStnC0oEhnGPPV1k8ZYnxBKOuW8', - 'choices': [{ - 'finish_reason': 'stop', - 'index': 0, - 'logprobs': None, - 'message': { - 'content': "Uber's profit in 2020 was a net loss of $6.77 billion." - }, - ... - }] - }, - 'search_results': { - 'vector_search_results': [...], - 'kg_search_results': None - } - } - } - ``` - - - - - - The query for RAG. - - - - Optional settings for vector search, either a dictionary, a `VectorSearchSettings` object, or `None` may be passed. If a dictionary is used, non-specified fields will use the server-side default. - - - - Whether to use vector search. - - - - Whether to perform a hybrid search (combining vector and keyword search). - - - - Alias for `search_filters`, now `deprecated`. - - - - Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. - - Commonly seen filters include operations include the following: - - `{"document_id": {"$eq": "9fbe403b-..."}}` - - `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` - - `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` - - `{"$and": {"$document_id": ..., "collection_ids": ...}}` - - - - Maximum number of results to return (1-1000). - - - - Collection Ids to search for. - - - - The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product). - - - - Whether to include search score values in the search results. - - - - Whether to include element metadata in the search results. - - - - Number of ivfflat index lists to query. Higher increases accuracy but decreases speed. - - - - Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed. - - - - Settings for hybrid search. - - - Weight to apply to full text search. - - - - Weight to apply to semantic search. - - - - Maximum number of results to return from full text search. - - - - K-value for RRF (Rank Reciprocal Fusion). - - - - - - - - Optional settings for knowledge graph search, either a dictionary, a `KGSearchSettings` object, or `None` may be passed. If a dictionary or `None` is passed, then R2R will use server-side defaults for non-specified fields. - - - The `KGSearchSettings` class allows you to configure the knowledge graph search settings for your R2R system. Here are the available options: - - - Alias for `search_filters`, now `deprecated`. - - - - Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. - - Commonly seen filters include operations include the following: - - `{"document_id": {"$eq": "9fbe403b-..."}}` - - `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` - - `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` - - `{"$and": {"$document_id": ..., "collection_ids": ...}}` - - - - Collection IDs to search for. - - - - The system prompt for the GraphRAG map prompt. - - - - The system prompt for the GraphRAG reduce prompt. - - - - Whether to use knowledge graph search. - - - - The type of knowledge graph search to perform. Supported value: "local". - - - - The level of knowledge graph search to perform. - - - - Configuration for text generation during graph search. - - - - The maximum length of the community description. - - - - The maximum number of LLM queries to perform during global search. - - - - The local search limits for different entity types. The default values are: - - `"__Entity__"`: 20 - - `"__Relationship__"`: 20 - - `"__Community__"`: 20 - - - - - - - - Optional configuration for LLM to use during RAG generation, including model selection and parameters. Will default to values specified in `r2r.toml`. - - - Model used in final LLM completion. - - - - Temperature used in final LLM completion. - - - - The `top_p` used in final LLM completion. - - - - The `max_tokens_to_sample` used in final LLM completion. - - - - The `functions` used in final LLM completion. - - - - The `tools` used in final LLM completion. - - - - The `api_base` used in final LLM completion. - - - - The `functions` used in final LLM completion. - - - - The `tools` used in final LLM completion. - - - - The `api_base` used in final LLM completion. - - - - - - - - Optional custom prompt to override the default task prompt. - - - - Augment document chunks with their respective document titles? - - -### RAG with custom search settings - -Use hybrid search in RAG: - -```javascript -const hybridRagResponse = await client.rag({ - query: "Who is Jon Snow?", - use_hybrid_search: true -}); -``` - -### RAG with custom completion LLM - -Use a different LLM model for RAG: - -```javascript -const customLLMRagResponse = await client.rag({ - query: "What is R2R?", - rag_generation_config: { - model: "anthropic/claude-3-opus-20240229" - } -}); -``` - -### Streaming RAG - -Stream RAG responses for real-time applications: - -```javascript -const streamResponse = await client.rag({ - query: "Who was Aristotle?", - rag_generation_config: { stream: true } -}); - -if (streamResponse instanceof ReadableStream) { - const reader = streamResponse.getReader(); - while (true) { - const { done, value } = await reader.read(); - if (done) break; - console.log(new TextDecoder().decode(value)); - } -} -``` -### Advanced RAG Techniques - -R2R supports advanced Retrieval-Augmented Generation (RAG) techniques that can be easily configured at runtime. These techniques include Hypothetical Document Embeddings (HyDE) and RAG-Fusion, which can significantly enhance the quality and relevance of retrieved information. - -To use an advanced RAG technique, you can specify the `search_strategy` parameter in your vector search settings: - -```javascript -const client = new R2RClient(); - -// Using HyDE -const hydeResponse = await client.rag( - "What are the main themes in Shakespeare's plays?", - { - vector_search_settings: { - search_strategy: "hyde", - search_limit: 10 - } - } -); - -// Using RAG-Fusion -const ragFusionResponse = await client.rag( - "Explain the theory of relativity", - { - vector_search_settings: { - search_strategy: "rag_fusion", - search_limit: 20 - } - } -); -``` - -For a comprehensive guide on implementing and optimizing advanced RAG techniques in R2R, including HyDE and RAG-Fusion, please refer to our [Advanced RAG Cookbook](/cookbooks/advanced-rag). - - -### Customizing RAG - -Putting everything together for highly custom RAG functionality: - -```javascript -const customRagResponse = await client.rag({ - query: "Who was Aristotle?", - use_vector_search: true, - use_hybrid_search: true, - use_kg_search: true, - kg_generation_config: {}, - rag_generation_config: { - model: "anthropic/claude-3-haiku-20240307", - temperature: 0.7, - stream: true - } -}); -``` - -## Agents - -### Multi-turn agentic RAG -The R2R application includes agents which come equipped with a search tool, enabling them to perform RAG. Using the R2R Agent for multi-turn conversations: - -```javascript -const messages = [ - { role: "user", content: "What was Aristotle's main contribution to philosophy?" }, - { role: "assistant", content: "Aristotle made numerous significant contributions to philosophy, but one of his main contributions was in the field of logic and reasoning. He developed a system of formal logic, which is considered the first comprehensive system of its kind in Western philosophy. This system, often referred to as Aristotelian logic or term logic, provided a framework for deductive reasoning and laid the groundwork for scientific thinking." }, - { role: "user", content: "Can you elaborate on how this influenced later thinkers?" } -]; - -const agentResponse = await client.agent({ - messages, - use_vector_search: true, - use_hybrid_search: true -}); -``` - -Note that any of the customization seen in AI powered search and RAG documentation above can be applied here. - - - - - The agent endpoint will return the entire conversation as a response, including internal tool calls. - ```javascript - { - results: [ - { - role: 'system', - content: '## You are a helpful agent that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.\n', - name: null, - function_call: null, - tool_calls: null - }, - { - role: 'user', - content: "What was Aristotle's main contribution to philosophy?", - name: null, - function_call: null, - tool_calls: null - }, - { - role: 'assistant', - content: "Aristotle made numerous significant contributions to philosophy, but one of his main contributions was in the field of logic and reasoning. He developed a system of formal logic, which is considered the first comprehensive system of its kind in Western philosophy. This system, often referred to as Aristotelian logic or term logic, provided a framework for deductive reasoning and laid the groundwork for scientific thinking.", - name: null, - function_call: null, - tool_calls: null - }, - { - role: 'user', - content: 'Can you elaborate on how this influenced later thinkers?', - name: null, - function_call: null, - tool_calls: null - }, - { - role: 'assistant', - content: null, - name: null, - function_call: { - name: 'search', - arguments: '{"query":"Aristotle\'s influence on later thinkers in philosophy"}' - }, - tool_calls: null - }, - { - role: 'function', - content: '1. ormation: List of writers influenced by Aristotle More than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, "it is doubtful whether any human being has ever known as much as he did".[145]\n2. subject of contemporary philosophical discussion. Aristotle\'s views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition\n3. the scholastic tradition of the Catholic Church. Aristotle was revered among medieval Muslim scholars as "The First Teacher", and among medieval Christians like Thomas Aquinas as simply "The Philosopher", while the poet Dante called him "the master of those who know". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle\'s influence on logic continued well into the 19th century. In addition, his ethics, although\n4. hilosophy\nFurther information: Peripatetic school The immediate influence of Aristotle\'s work was felt as the Lyceum grew into the Peripatetic school. Aristotle\'s students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle\'s influence over Alexander the Great is seen in the latter\'s bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal\n5. scholastic philosophers. Alkindus greatly admired Aristotle\'s philosophy,[168] and Averroes spoke of Aristotle as the "exemplar" for all future philosophers.[169] Medieval Muslim scholars regularly described Aristotle as the "First Teacher".[167] The title was later used by Western philosophers (as in the famous poem of Dante) who were influenced by the tradition of Islamic philosophy.[170]\n\nMedieval Europe\nFurther information: Aristotelianism and Syllogism § Medieval\n6. those by James of Venice and William of Moerbeke. After the Scholastic Thomas Aquinas wrote his Summa Theologica, working from Moerbeke\'s translations and calling Aristotle "The Philosopher",[172] the demand for Aristotle\'s writings grew, and the Greek manuscripts returned to the West, stimulating a revival of Aristotelianism in Europe that continued into the Renaissance.[173] These thinkers blended Aristotelian philosophy with Christianity, bringing the thought of Ancient Greece into the Middle Ages.', - name: 'search', - function_call: null, - tool_calls: null - }, - { - role: 'assistant', - content: "Aristotle's contributions to philosophy, particularly his development of formal logic, had a profound influence on later thinkers across various cultures and eras. Here are some key ways in which his work influenced subsequent intellectual traditions:\n\n1. Medieval Islamic Philosophy: Aristotle was highly revered among medieval Muslim scholars, who referred to him as \"The First Teacher.\" His works were extensively studied and commented upon by philosophers such as Averroes and Avicenna. These scholars played a crucial role in preserving and expanding upon Aristotle's ideas, which later influenced Western thought [5].\n\n2. Christian Scholasticism: In medieval Europe, Aristotle's works were integrated into Christian theology, particularly through the efforts of Thomas Aquinas, who referred to Aristotle as \"The Philosopher.\" Aquinas' synthesis of Aristotelian philosophy with Christian doctrine became a cornerstone of Scholasticism, a dominant intellectual tradition in medieval Europe [3][6].\n\n3. Renaissance and Enlightenment: Aristotle's influence persisted into the Renaissance, where his works were revived and studied extensively. This period saw a renewed interest in classical texts, and Aristotle's ideas continued to shape scientific and philosophical inquiry until the Enlightenment, when new scientific paradigms began to emerge [2][6].\n\n4. Development of Logic: Aristotle's system of formal logic remained the standard for centuries and was studied by medieval scholars such as Peter Abelard and Jean Buridan. His influence on logic extended well into the 19th century, shaping the development of this field [3].\n\n5. Peripatetic School: Aristotle's immediate influence was also felt through the Peripatetic school, which he founded. His students, including Theophrastus, carried on his work and further developed his ideas, ensuring that his intellectual legacy continued [4].\n\nOverall, Aristotle's contributions laid the groundwork for many fields of study and influenced a wide range of thinkers, making him one of the most significant figures in the history of Western philosophy and beyond.", - name: null, - function_call: null, - tool_calls: null - } - ] - } - ``` - - - - - - The array of messages to pass to the RAG agent. - - - - Whether to use vector search. - - - - Optional filters for the search. - - - - The maximum number of search results to return. - - - - Whether to perform a hybrid search (combining vector and keyword search). - - - - Whether to use knowledge graph search. - - - - Optional configuration for knowledge graph search generation. - - - - Optional configuration for RAG generation, including model selection and parameters. - - - - Optional custom prompt to override the default task prompt. - - - - Whether to include document titles in the context if available. - - -### Multi-turn agentic RAG with streaming - -The response from the RAG agent may be streamed directly back: - -```javascript -const messages = [ - { role: "user", content: "What was Aristotle's main contribution to philosophy?" }, - { role: "assistant", content: "Aristotle made numerous significant contributions to philosophy, but one of his main contributions was in the field of logic and reasoning. He developed a system of formal logic, which is considered the first comprehensive system of its kind in Western philosophy. This system, often referred to as Aristotelian logic or term logic, provided a framework for deductive reasoning and laid the groundwork for scientific thinking." }, - { role: "user", content: "Can you elaborate on how this influenced later thinkers?" } -]; - -const agentResponse = await client.agent({ - messages, - use_vector_search: true, - use_hybrid_search: true, - rag_generation_config: { stream: true } -}); - -if (agentResponse instanceof ReadableStream) { - const reader = agentResponse.getReader(); - while (true) { - const { done, value } = await reader.read(); - if (done) break; - console.log(new TextDecoder().decode(value)); - } -} -``` - - - - - The agent endpoint will stream back its response, including internal tool calls. - ```javascript - search{"query":"Aristotle's influence on later thinkers in philosophy"}"{"id":"b234931e-0cfb-5644-8f23-560a3097f5fe","score":1.0,"metadata":{"text":"ormation: List of writers influenced by Aristotle More than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, \"it is doubtful whether any human being has ever known as much as he did\".[145]","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"1827ac2c-2a06-5bc2-ad29-aa14b4d99540","score":1.0,"metadata":{"text":"subject of contemporary philosophical discussion. Aristotle's views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"94718936-ea92-5e29-a5ee-d4a6bc037384","score":1.0,"metadata":{"text":"the scholastic tradition of the Catholic Church. Aristotle was revered among medieval Muslim scholars as \"The First Teacher\", and among medieval Christians like Thomas Aquinas as simply \"The Philosopher\", while the poet Dante called him \"the master of those who know\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle's influence on logic continued well into the 19th century. In addition, his ethics, although","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"16483f14-f8a2-5c5c-8fcd-1bcbbd6603e4","score":1.0,"metadata":{"text":"hilosophy\nFurther information: Peripatetic school The immediate influence of Aristotle's work was felt as the Lyceum grew into the Peripatetic school. Aristotle's students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle's influence over Alexander the Great is seen in the latter's bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"26eb20ee-a203-5ad5-beaa-511cc526aa6e","score":1.0,"metadata":{"text":"scholastic philosophers. Alkindus greatly admired Aristotle's philosophy,[168] and Averroes spoke of Aristotle as the \"exemplar\" for all future philosophers.[169] Medieval Muslim scholars regularly described Aristotle as the \"First Teacher\".[167] The title was later used by Western philosophers (as in the famous poem of Dante) who were influenced by the tradition of Islamic philosophy.[170]\n\nMedieval Europe\nFurther information: Aristotelianism and Syllogism § Medieval","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}","{"id":"a08fd1b4-4e6f-5487-9af6-df2f6cfe1048","score":1.0,"metadata":{"text":"those by James of Venice and William of Moerbeke. After the Scholastic Thomas Aquinas wrote his Summa Theologica, working from Moerbeke's translations and calling Aristotle \"The Philosopher\",[172] the demand for Aristotle's writings grew, and the Greek manuscripts returned to the West, stimulating a revival of Aristotelianism in Europe that continued into the Renaissance.[173] These thinkers blended Aristotelian philosophy with Christianity, bringing the thought of Ancient Greece into the Middle Ages.","title":"aristotle.txt","user_id":"2acb499e-8428-543b-bd85-0d9098718220","document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","extraction_id":"69431c4a-30cf-504f-8fab-7dcfc7580c63","associatedQuery":"Aristotle's influence on later thinkers in philosophy"}}"Aristotle's contributions to philosophy, particularly his development of formal logic, had a profound influence on later thinkers across various cultures and eras. Here are some key ways in which his work influenced subsequent intellectual traditions: - -1. Medieval Islamic Philosophy: Aristotle was highly revered among medieval Muslim scholars, who referred to him as "The First Teacher." His works were extensively translated into Arabic and studied by philosophers such as Averroes and Avicenna. These scholars not only preserved Aristotle's works but also expanded upon them, influencing both Islamic and Western thought. - -2. Christian Scholasticism: In medieval Europe, Aristotle's works were integrated into Christian theology, particularly through the efforts of Thomas Aquinas, who referred to Aristotle as "The Philosopher." Aquinas's synthesis of Aristotelian philosophy with Christian doctrine became a cornerstone of Scholasticism, a dominant intellectual tradition in medieval Europe. - -3. Renaissance and Enlightenment: Aristotle's influence persisted into the Renaissance and Enlightenment periods. His works on logic, ethics, and natural sciences were foundational texts for scholars during these eras. The revival of Aristotelianism during the Renaissance helped bridge the gap between ancient Greek philosophy and modern scientific thought. - -4. Development of Logic: Aristotle's system of formal logic remained the standard for centuries and was studied by medieval scholars such as Peter Abelard and Jean Buridan. His influence on logic extended well into the 19th century, shaping the development of this field. - -5. Peripatetic School: Aristotle's immediate influence was felt through the Peripatetic school, which he founded. His students, including Theophrastus, continued to develop and disseminate his ideas, ensuring that his philosophical legacy endured. - -Overall, Aristotle's contributions to logic, ethics, natural sciences, and metaphysics created a foundation upon which much of Western intellectual tradition was built. His work influenced a wide range of fields and thinkers, making him one of the most pivotal figures in the history of philosophy. - ``` - - - diff --git a/docs/documentation/local-rag.mdx b/docs/documentation/local-rag.mdx deleted file mode 100644 index 2de6674da..000000000 --- a/docs/documentation/local-rag.mdx +++ /dev/null @@ -1,77 +0,0 @@ ---- -title: 'Local LLMs' -description: 'Learn how to run a Retrieval-Augmented Generation system locally using R2R' -icon: 'house' ---- - -## Introduction - -To run R2R with default local LLM settings, execute `r2r serve --docker --config-name=local_llm`. - -R2R supports RAG with local LLMs through the Ollama library. You may follow the instructions on their [official website](https://ollama.com/) to install Ollama outside of the R2R Docker. - -## Preparing Local LLMs - -Next, make sure that you have all the necessary LLMs installed: -```bash -# in a separate terminal -ollama pull llama3.1 -ollama pull mxbai-embed-large -ollama serve -``` - -These commands will need to be replaced with models specific to your configuration when deploying R2R with a customized configuration. - -## Configuration - -R2R uses a TOML configuration file for managing settings, which you can [read about here](/documentation/configuration/introduction). For local setup, we'll use the default `local_llm` configuration. This can be customized to your needs by setting up a standalone project. - - - - - -The `local_llm` configuration file (`core/configs/local_llm.toml`) includes: - -```toml -[completion] -provider = "litellm" -concurrent_request_limit = 1 - - [completion.generation_config] - model = "ollama/llama3.1" - temperature = 0.1 - top_p = 1 - max_tokens_to_sample = 1_024 - stream = false - add_generation_kwargs = { } - -[database] -provider = "postgres" - -[embedding] -provider = "ollama" -base_model = "mxbai-embed-large" -base_dimension = 1_024 -batch_size = 32 -add_title_as_prefix = true -concurrent_request_limit = 32 - -[ingestion] -excluded_parsers = [ "mp4" ] -``` - -This configuration uses `ollama` and the model `mxbai-embed-large` to run embeddings. We have excluded media file parsers as they are not yet supported locally. - - -We are still working on adding local multimodal RAG features. Your feedback would be appreciated. - - - - - - -For more information on how to configure R2R, [visit here](/documentation/configuration/introduction). - -## Summary - -The above steps are all you need to get RAG up and running with local LLMs in R2R. For detailed setup and basic functionality, refer back to the [R2R Quickstart]((/documentation/quickstart/introduction). For more advanced usage and customization options, refer to the [basic configuration]((/documentation/configuration/introduction) or join the [R2R Discord community](https://discord.gg/p6KqD2kjtB). diff --git a/docs/documentation/python-sdk/auth.mdx b/docs/documentation/python-sdk/auth.mdx deleted file mode 100644 index 96b48d707..000000000 --- a/docs/documentation/python-sdk/auth.mdx +++ /dev/null @@ -1,336 +0,0 @@ ---- -title: 'Authentication' -description: 'Manage users in R2R with built-in authentication' ---- - - -Occasionally this SDK documentation falls out of date, cross-check with the automatcially generated API Reference documentation for the latest parameters. - - - -## User Authentication and Management - -R2R provides a comprehensive set of user authentication and management features, allowing you to implement secure and feature-rich authentication systems in your applications. - -### User Registration - -To register a new user: - -```python -register_response = client.register("user@example.com", "password123") -``` - - - - - ```python - { - 'results': { - 'email': 'user@example.com', - 'id': 'bf417057-f104-4e75-8579-c74d26fcbed3', - 'hashed_password': '$2b$12$p6a9glpAQaq.4uzi4gXQru6PN7WBpky/xMeYK9LShEe4ygBf1L.pK', - 'is_superuser': False, - 'is_active': True, - 'is_verified': False, - 'verification_code_expiry': None, - 'name': None, - 'bio': None, - 'profile_picture': None, - 'created_at': '2024-07-16T22:53:47.524794Z', - 'updated_at': '2024-07-16T22:53:47.524794Z' - } - } - ``` - - - - -### Email Verification - -If email verification is enabled, verify a user's email: - -```python -verify_response = client.verify_email("verification_code_here") -``` - - - - - ```python - { - "results": { - "message": "Email verified successfully" - } - } - ``` - - - - -### User Login - -To log in and obtain access tokens: - -```python -login_response = client.login("user@example.com", "password123") -``` - - - - - ```python - { - 'results': { - 'access_token': { - 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', - 'token_type': 'access' - }, - 'refresh_token': { - 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', - 'token_type': 'refresh' - } - } - } - ``` - - - - -### Get Current User Info - -Retrieve information about the currently authenticated user: - -```python -user_info = client.user() -``` - - - - - ```python - { - 'results': { - 'email': 'user@example.com', - 'id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', - 'hashed_password': 'null', - 'is_superuser': False, - 'is_active': True, - 'is_verified': True, - 'verification_code_expiry': None, - 'name': None, - 'bio': None, - 'profile_picture': None, - 'created_at': '2024-07-16T23:06:42.123303Z', - 'updated_at': '2024-07-16T23:22:48.256239Z' - } - } - ``` - - - - -### Refresh Access Token - -Refresh an expired access token: - -```python -refresh_response = client.refresh_access_token() -``` - - - - - ```python - { - 'results': { - 'access_token': { - 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', - 'token_type': 'access' - }, - 'refresh_token': { - 'token': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...', - 'token_type': 'refresh' - } - } - } - ``` - - - - -### Change Password - -Change the user's password: - -```python -change_password_result = client.change_password("password123", "new_password") -``` - - - - - ```python - { - "result": { - "message": "Password changed successfully" - } - } - ``` - - - - -### Request Password Reset - -Request a password reset for a user: - -```python -reset_request_result = client.request_password_reset("user@example.com") -``` - - - - - ```python - { - "result": { - "message": "If the email exists, a reset link has been sent" - } - } - ``` - - - - -### Confirm Password Reset - -Confirm a password reset using the reset token: - -```python -reset_confirm_result = client.confirm_password_reset("reset_token_here", "new_password") -``` - - - - - ```python - { - "result": { - "message": "Password reset successfully" - } - } - ``` - - - - -### Update User Profile - -Update the user's profile information: - -```python -update_result = client.update_user(name="John Doe", bio="R2R enthusiast") -``` - - - - - ```python - { - 'results': { - 'email': 'user@example.com', - 'id': '76eea168-9f98-4672-af3b-2c26ec92d7f8', - 'hashed_password': 'null', - 'is_superuser': False, - 'is_active': True, - 'is_verified': True, - 'verification_code_expiry': None, - 'name': 'John Doe', - 'bio': 'R2R enthusiast', - 'profile_picture': None, - 'created_at': '2024-07-16T23:06:42.123303Z', - 'updated_at': '2024-07-16T23:22:48.256239Z' - } - } - ``` - - - - -### Delete User Account - -Delete the user's account: - -```python -user_id = register_response["results"]["id"] # input unique id here -delete_result = client.delete_user(user_id, "password123") -``` - - - - - ```python - { - 'results': { - 'message': 'User account deleted successfully' - } - } - ``` - - - - -### User Logout - -Log out and invalidate the current access token: - -```python -logout_response = client.logout() -``` - - - - - ```python - { - 'results': { - 'message': 'Logged out successfully' - } - } - ``` - - - - -### Superuser Capabilities - -Superusers have additional privileges, including access to system-wide operations and sensitive information. To use superuser capabilities, authenticate as a superuser or the default admin: - -```python -# Login as admin -login_result = client.login("admin@example.com", "admin_password") - -# Access superuser features -users_overview = client.users_overview() -logs = client.logs() -analytics_result = client.analytics( - {"all_latencies": "search_latency"}, - {"search_latencies": ["basic_statistics", "search_latency"]} -) -``` - - -Superuser actions should be performed with caution and only by authorized personnel. Ensure proper security measures are in place when using superuser capabilities. - - -## Security Considerations - -When implementing user authentication, consider the following best practices: - -1. Always use HTTPS in production to encrypt data in transit. -2. Implement rate limiting to protect against brute-force attacks. -3. Use secure password hashing (R2R uses bcrypt by default). -4. Consider implementing multi-factor authentication (MFA) for enhanced security. -5. Conduct regular security audits of your authentication system. - -For more advanced use cases or custom implementations, refer to the R2R documentation or reach out to the community for support. diff --git a/docs/documentation/python-sdk/collections.mdx b/docs/documentation/python-sdk/collections.mdx deleted file mode 100644 index 8d0082efe..000000000 --- a/docs/documentation/python-sdk/collections.mdx +++ /dev/null @@ -1,423 +0,0 @@ ---- -title: 'Collection Management' -description: 'Manage collections in R2R' ---- - - -Occasionally this SDK documentation falls out of date, cross-check with the automatcially generated API Reference documentation for the latest parameters. - - - -A collection in R2R is a logical grouping of users and documents that allows for efficient access control and organization. Collections enable you to manage permissions and access to documents at a collection level, rather than individually. - -R2R provides a comprehensive set of collection features, allowing you to implement efficient access control and organization of users and documents in your applications. - - - -Collection permissioning in R2R is still under development and as a result the API will likely evolve. - - - -## Collection creation and Management - -### Create a Collection - -Create a new collection with a name and optional description: - -```python -create_collection_response = client.create_collection("Marketing Team", "Collection for marketing department") -collection_id = create_collection_response["results"]["collection_id] # '123e4567-e89b-12d3-a456-426614174000' -``` - - - - - ```python - { - 'results': { - 'collection_id': '123e4567-e89b-12d3-a456-426614174000', - 'name': 'Marketing Team', - 'description': 'Collection for marketing department', - 'created_at': '2024-07-16T22:53:47.524794Z', - 'updated_at': '2024-07-16T22:53:47.524794Z' - } - } - ``` - - - - -### Get Collection details - -Retrieve details about a specific collection: - -```python -collection_details = client.get_collection(collection_id) -``` - - - - - ```python - { - 'results': { - 'collection_id': '123e4567-e89b-12d3-a456-426614174000', - 'name': 'Marketing Team', - 'description': 'Collection for marketing department', - 'created_at': '2024-07-16T22:53:47.524794Z', - 'updated_at': '2024-07-16T22:53:47.524794Z' - } - } - ``` - - - - -### Update a Collection - -Update a collection's name or description: - -```python -update_result = client.update_collection( - collection_id, - name="Updated Marketing Team", - description="New description for marketing team" -) -``` - - - - - ```python - { - 'results': { - 'collection_id': '123e4567-e89b-12d3-a456-426614174000', - 'name': 'Updated Marketing Team', - 'description': 'New description for marketing team', - 'created_at': '2024-07-16T22:53:47.524794Z', - 'updated_at': '2024-07-16T23:15:30.123456Z' - } - } - ``` - - - - -### List Collections - -Get a list of all collections: - -```python -collections_list = client.list_collections() -``` - - - - - ```python - { - 'results': [ - { - 'collection_id': '123e4567-e89b-12d3-a456-426614174000', - 'name': 'Updated Marketing Team', - 'description': 'New description for marketing team', - 'created_at': '2024-07-16T22:53:47.524794Z', - 'updated_at': '2024-07-16T23:15:30.123456Z' - }, - # ... other collections ... - ] - } - ``` - - - - -## User Management in Collections - -### Add User to Collection - -Add a user to a collection: - -```python - -user_id = '456e789f-g01h-34i5-j678-901234567890' # This should be a valid user ID -add_user_result = client.add_user_to_collection(user_id, collection_id) -``` - - - - - ```python - { - 'results': { - 'message': 'User successfully added to the collection' - } - } - ``` - - - - -### Remove User from Collection - -Remove a user from a collection: - -```python -remove_user_result = client.remove_user_from_collection(user_id, collection_id) -``` - - - - - ```python - { - 'results': { - 'message': 'User successfully removed from the collection' - } - } - ``` - - - - -### List Users in Collection - -Get a list of all users in a specific collection: - -```python -users_in_collection = client.get_users_in_collection(collection_id) -``` - - - - - ```python - { - 'results': [ - { - 'user_id': '456e789f-g01h-34i5-j678-901234567890', - 'email': 'user@example.com', - 'name': 'John Doe', - # ... other user details ... - }, - # ... other users ... - ] - } - ``` - - - - -### Get User's Collections - -Get all collections that a user is a member of: - -```python -user_collections = client.user_collections(user_id) -``` - - - - - ```python - { - 'results': [ - { - 'collection_id': '123e4567-e89b-12d3-a456-426614174000', - 'name': 'Updated Marketing Team', - # ... other Collection details ... - }, - # ... other collections ... - ] - } - ``` - - - - -## Document Management in Collections - -### Assign Document to Collection - -Assign a document to a collection: - -```python -document_id = '789g012j-k34l-56m7-n890-123456789012' # must be a valid document id -assign_doc_result = client.assign_document_to_collection(document_id, collection_id) -``` - - - - - ```python - { - 'results': { - 'message': 'Document successfully assigned to the collection' - } - } - ``` - - - - -### Remove Document from Collection - -Remove a document from a collection: - -```python -remove_doc_result = client.remove_document_from_collection(document_id, collection_id) -``` - - - - - ```python - { - 'results': { - 'message': 'Document successfully removed from the collection' - } - } - ``` - - - - -### List Documents in Collection - -Get a list of all documents in a specific collection: - -```python -docs_in_collection = client.documents_in_collection(collection_id) -``` - - - - - ```python - { - 'results': [ - { - 'document_id': '789g012j-k34l-56m7-n890-123456789012', - 'title': 'Marketing Strategy 2024', - # ... other document details ... - }, - # ... other documents ... - ] - } - ``` - - - - -### Get Document's Collections - -Get all collections that a document is assigned to: - -```python -document_collections = client.document_collections(document_id) -``` - - - - - ```python - { - 'results': [ - { - 'collection_id': '123e4567-e89b-12d3-a456-426614174000', - 'name': 'Updated Marketing Team', - # ... other Collection details ... - }, - # ... other collections ... - ] - } - ``` - - - - -## Advanced Collection Management - -### Collection Overview - -Get an overview of collections, including user and document counts: - -```python -collections_overview = client.collections_overview() -``` - - - - - ```python - { - 'results': [ - { - 'collection_id': '123e4567-e89b-12d3-a456-426614174000', - 'name': 'Updated Marketing Team', - 'description': 'New description for marketing team', - 'user_count': 5, - 'document_count': 10, - 'created_at': '2024-07-16T22:53:47.524794Z', - 'updated_at': '2024-07-16T23:15:30.123456Z' - }, - # ... other collections ... - ] - } - ``` - - - - -### Delete a Collection - -Delete a collection: - -```python -delete_result = client.delete_collection(collection_id) -``` - - - - - ```python - { - 'results': { - 'message': 'Collection successfully deleted' - } - } - ``` - - - - -## Pagination and Filtering - -Many collection-related methods support pagination and filtering: - -```python -# List collections with pagination -paginated_collection = client.list_collections(offset=10, limit=20) - -# Get users in a collection with pagination -paginated_users = client.get_users_in_collection(collection_id, offset=5, limit=10) - -# Get documents in a collection with pagination -paginated_docs = client.documents_in_collection(collection_id, offset=0, limit=50) - -# Get collections overview with specific collection IDs -specific_collections_overview = client.collections_overview(collection_ids=['id1', 'id2', 'id3']) -``` - -## Security Considerations - -When implementing collection permissions, consider the following security best practices: - -1. Always use HTTPS in production to encrypt data in transit. -2. Implement the principle of least privilege by assigning the minimum necessary permissions to users and collections. -3. Regularly audit collection memberships and document assignments. -4. Ensure that only authorized users (e.g., admins) can perform collection management operations. -5. Implement comprehensive logging for all collection-related actions. -6. Consider implementing additional access controls or custom roles within your application logic for more fine-grained permissions. - -For more advanced use cases or custom implementations, refer to the R2R documentation or reach out to the community for support. diff --git a/docs/documentation/python-sdk/conversations.mdx b/docs/documentation/python-sdk/conversations.mdx deleted file mode 100644 index 7d2571853..000000000 --- a/docs/documentation/python-sdk/conversations.mdx +++ /dev/null @@ -1,193 +0,0 @@ ---- -title: 'Conversations' -description: 'Managing conversations with R2R.' ---- - - -This feature is currently in beta. Functionality may change, and we value your feedback around these features. - - - -Occasionally this SDK documentation falls out of date, cross-check with the automatically generated API Reference documentation for the latest parameters. - - -## Conversation Management - -### Get Conversations Overview - -Retrieve an overview of existing conversations: - -```python -conversation_ids = ['123e4567-e89b-12d3-a456-426614174000', '987f6543-e21b-12d3-a456-426614174000'] -offset = 0 -limit = 10 -overview_response = await client.conversations_overview(conversation_ids, offset, limit) -``` - - - - - The response containing an overview of conversations. - - - - - - Optional list of conversation UUIDs to retrieve. - - - - The offset to start listing conversations from. - - - - The maximum number of conversations to return. - - -### Get Conversation - -Fetch a specific conversation by its UUID: - -```python -conversation_id = '123e4567-e89b-12d3-a456-426614174000' -branch_id = 'optional-branch-id' -conversation = await client.get_conversation(conversation_id, branch_id) -``` - - - - - The response containing the requested conversation details. - - - - - - The UUID of the conversation to retrieve. - - - - Optional ID of a specific branch to retrieve. - - -### Create Conversation - -Create a new conversation: - -```python -new_conversation = await client.create_conversation() -``` - - - - - The response containing details of the newly created conversation. - - - - -### Add Message - -Add a message to an existing conversation: - -```python -conversation_id = '123e4567-e89b-12d3-a456-426614174000' -message = Message(text='Hello, world!') -parent_id = '98765432-e21b-12d3-a456-426614174000' -metadata = {'key': 'value'} - -add_message_response = await client.add_message(conversation_id, message, parent_id, metadata) -``` - - - - - The response after adding the message to the conversation. - - - - - - The UUID of the conversation to add the message to. - - - - The message object to add to the conversation. - - - - An optional UUID of the parent message. - - - - An optional metadata dictionary for the message. - - -### Update Message - -Update an existing message in a conversation: - -```python -message_id = '98765432-e21b-12d3-a456-426614174000' -updated_message = Message(text='Updated message content') - -update_message_response = await client.update_message(message_id, updated_message) -``` - - - - - The response after updating the message. - - - - - - The UUID of the message to update. - - - - The updated message object. - - -### Get Branches Overview - -Retrieve an overview of branches in a conversation: - -```python -conversation_id = '123e4567-e89b-12d3-a456-426614174000' -branches_overview = await client.branches_overview(conversation_id) -``` - - - - - The response containing an overview of branches in the conversation. - - - - - - The UUID of the conversation to get branches for. - - -### Delete Conversation - -Delete a conversation by its UUID: - -```python -conversation_id = '123e4567-e89b-12d3-a456-426614174000' -delete_response = await client.delete_conversation(conversation_id) -``` - - - - - The response after deleting the conversation. - - - - - - The UUID of the conversation to delete. - diff --git a/docs/documentation/python-sdk/graphrag.mdx b/docs/documentation/python-sdk/graphrag.mdx deleted file mode 100644 index f3a69f6af..000000000 --- a/docs/documentation/python-sdk/graphrag.mdx +++ /dev/null @@ -1,556 +0,0 @@ ---- -title: 'Knowledge Graphs' -description: 'Creating a knowledge graph and running graphrag using R2R.' ---- - -## Create a graph - -Creating a graph on your documents. - - -```python -client.create_graph( - collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', # optional - run_type="run", # estimate or run - kg_creation_settings={ - "force_kg_creation": True, - "kg_triples_extraction_prompt": "graphrag_triples_extraction_few_shot", - "entity_types": [], - "relation_types": [], - "extraction_merge_count": 4, - "max_knowledge_triples": 100, - "max_description_input_length": 65536, - "generation_config": { - "model": "openai/gpt-4o-mini", - # other generation config params - } - } -) -``` - - - - - The response from the R2R system after creating the graph. - ```bash - {'message': 'Graph creation task queued successfully.', 'task_id': '6e27dfca-606d-422d-b73f-2d9e138661b4'} - ``` - - - - - - The ID of the collection to create the graph for. If not provided, the graph will be created for the default collection. - - - - The type of run to perform. Options are "estimate" or "run". Estimate will return an estimate of the creation cost, and run will create the graph. - - - - - The settings for the graph creation process. - - - The prompt to use for triples extraction. - - - The entity types to extract. If not provided, all entity types will be extracted. - - - The relation types to extract. If not provided, all relation types will be extracted. - - - The number of chunks to merge into a single extraction. - - - The maximum number of triples to extract from each chunk. - - - The maximum length of the description for a node in the graph in characters (and not tokens). - - Used so that we don't hit the input context window of the LLM while generating descriptions. - - - The configuration for text generation during graph enrichment. - - - - -## Enrich a graph - -```python -client.enrich_graph( - collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', - run_type="run", - kg_enrichment_settings={ - "community_reports_prompt": "graphrag_community_reports", - "max_summary_input_length": 65536, - "generation_config": { - "model": "openai/gpt-4o-mini", - "temperature": 0.12, - # other generation config params - }, - "leiden_params": { - # leiden algorithm params, all are optional, default values are shown - "max_cluster_size": 1000, - "starting_communities": None, - "extra_forced_iterations": 0, - "resolution": 1.0, - "randomness": 0.001, - "use_modularity": True, - "random_seed": 7272, # If not set, defaults to 7272 - "weight_attribute": "weight", - "is_weighted": None, - "weight_default": 1.0, - "check_directed": True, - } - } -) -``` - - - - - The response from the R2R system after enriching the graph. - ```bash - {'message': 'Graph enrichment task queued successfully.', 'task_id': '6e27dfca-606d-422d-b73f-2d9e138661b4'} - ``` - - - - - - - The ID of the collection to enrich the graph for. If not provided, the graph will be enriched for the default collection. - - - - The type of run to perform. Options are "estimate" or "run". Estimate will return an estimate of the enrichment cost, and run will create the enriched graph. - - - - The settings for the graph enrichment process. - - - The prompt to use for community reports. - - - The maximum length of the summary input in characters (and not tokens). - - Used so that we don't hit the input context window of the LLM while generating community summaries. - - - The configuration for text generation during graph enrichment. - - - The parameters for the Leiden algorithm. - - - Default is ``1000``. Any partition or cluster with - membership >= ``max_cluster_size`` will be isolated into a subnetwork. This - subnetwork will be used for a new leiden global partition mapping, which will - then be remapped back into the global space after completion. Once all - clusters with membership >= ``max_cluster_size`` have been completed, the level - increases and the partition scheme is scanned again for any new clusters with - membership >= ``max_cluster_size`` and the process continues until every - cluster's membership is < ``max_cluster_size`` or if they cannot be broken into - more than one new community. - - - - Default is ``None``. An optional community mapping dictionary that contains a node - id mapping to the community it belongs to. Please see the Notes section regarding - node ids used. - - If no community map is provided, the default behavior is to create a node - community identity map, where every node is in their own community. - - - Default is ``0``. Leiden will run until a maximum quality score has been found - for the node clustering and no nodes are moved to a new cluster in another - iteration. As there is an element of randomness to the Leiden algorithm, it is - sometimes useful to set ``extra_forced_iterations`` to a number larger than 0 - where the entire process is forced to attempt further refinement. - - - Default is ``1.0``. Higher resolution values lead to more communities and lower - resolution values leads to fewer communities. Must be greater than 0. - - - - Default is ``0.001``. The larger the randomness value, the more exploration of - the partition space is possible. This is a major difference from the Louvain - algorithm, which is purely greedy in the partition exploration. - - - Default is ``True``. If ``False``, will use a Constant Potts Model (CPM). - - - Default is ``7272``. Can provide an optional seed to the PRNG used in Leiden - for deterministic output. - - - Default is ``weight``. Only used when creating a weighed edge list of tuples - when the source graph is a networkx graph. This attribute corresponds to the - edge data dict key. - - - Default is ``None``. Only used when creating a weighted edge list of tuples - when the source graph is an adjacency matrix. The - :func:`graspologic.utils.is_unweighted` function will scan these - matrices and attempt to determine whether it is weighted or not. This flag can - short circuit this test and the values in the adjacency matrix will be treated - as weights. - - - Default is ``1.0``. If the graph is a networkx graph and the graph does not - have a fully weighted sequence of edges, this default will be used. If the - adjacency matrix is found or specified to be unweighted, this weight_default - will be used for every edge. - - - Default is ``True``. If the graph is an adjacency matrix, we will attempt to - ascertain whether it is directed or undirected. As our leiden implementation is - only known to work with an undirected graph, this function will raise an error - if it is found to be a directed graph. If you know it is undirected and wish to - avoid this scan, you can set this value to ``False`` and only the lower triangle - of the adjacency matrix will be used to generate the weighted edge list. - - - - - - -## Get entities - -```python -client.get_entities( - collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', - offset=0, - limit=1000, - entity_ids=None -) -``` - - - - - The response from the R2R system containing the list of entities in the graph. Total entries is the total number of entities in the graph for the collection (not the number of entities returned). - ```bash - { - "results": { - "entities": [ - { - "name": "PLATO", - "id": 2, - "category": null, - "description": "Plato was a prominent philosopher in ancient Greece, known for his foundational role in Western philosophy and for being the teacher of Aristotle at the Academy in Athens. His teachings significantly influenced Aristotle, who later established the Lyceum, reflecting the enduring impact of Plato's philosophical ideas on subsequent educational institutions.", - "description_embedding": null, - "community_numbers": null, - "extraction_ids": [ - "91370d27-31a4-5f6e-8a0c-2c943680fd78", - "695fa5b3-e416-5608-ba52-3b8b0a66bf3a", - "85e39f16-26b8-5fd3-89be-e9592864bb46", - "42c9e012-08e4-54d5-8df9-a8cb9fe277c9" - ], - "collection_id": null, - "document_id": "32b6a70f-a995-5c51-85d2-834f06283a1e", - "attributes": null - }, - ... # more entities - ], - "total_entries": 2 - } - } - ``` - - - - - - The ID of the collection to get the entities from. If not provided, the entities will be retrieved from the default collection. - - - - The offset for pagination. - - - - - The limit for pagination. - - - - The list of entity IDs to filter by. - - -## Get triples - -```python -client.get_triples( - collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', - offset=0, - limit=100, - entity_names=[], - triple_ids=None -) -``` - - - - - The response from the R2R system containing the list of triples in the graph. Total entries is the total number of triples in the graph for the collection (not the number of triples returned). - - ```bash - { - 'results': { - 'total_entries': 28, - 'triples': [ - { - 'id': 1, - 'subject': 'ARISTOTLE', - 'predicate': 'Father-Son', - 'object': 'NICOMACHUS', - 'weight': 1.0, - 'description': 'Nicomachus was the father of Aristotle, who died when Aristotle was a child. ', - 'predicate_embedding': None, - 'extraction_ids': [], - 'document_id': None, - 'attributes': {} - }, - ... # more triples - ] - } - } - ``` - - - - - - - The ID of the collection to get the triples from. If not provided, the triples will be retrieved from the default collection. - - - - The offset for pagination. Defaults to 0. - - - - - The limit for pagination. Defaults to 100. - - - - The list of entity names to filter by. Entities are in all caps. eg. ['ARISTOTLE', 'PLATO'] - - - - The list of triple IDs to filter by. - - - -## Get Communities - -```python -client.get_communities( - collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', - offset=0, - limit=100, - levels=[], - community_numbers=[], -) -``` - - - - - The response from the R2R system containing the list of communities in the graph. - - ```bash - { - 'results': - { - 'communities': [ - { - 'community_number': 0, - 'level': 0, - 'collection_id': '122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', - 'name': "Aristotle and Plato's Legacy", - 'summary': "This community encompasses key historical figures and institutions such as Aristotle, Plato, and their respective schools, Plato's Academy and the Lyceum. The interconnected relationships highlight the profound influence these entities have had on Western philosophy, education, and intellectual traditions.", - 'findings': - [ - "Aristotle, a polymath and philosopher, significantly contributed to various fields including natural sciences and ethics, and his teachings have had a lasting impact on medieval scholarship and modern philosophy. His role as a tutor to Alexander the Great further emphasizes his influence on historical leadership and governance. [Data: Entities (11), Relationships (3, 8, 22, +more)]", - "Plato's Academy served as a critical educational institution where Aristotle studied for 20 years, shaping his intellectual development and laying the groundwork for his later contributions to philosophy. The Academy's legacy is marked by its role in fostering critical thought and dialogue in ancient philosophy. [Data: Entities (5), Relationships (2, 17, 26, +more)]", - "The Lyceum, founded by Aristotle, became a vital source of knowledge and a hub for philosophical discourse in ancient Athens, influencing both medieval scholarship and the development of Western educational practices. Its establishment marked a significant evolution in the teaching of philosophy. [Data: Entities (7), Relationships (4, 11, 25, +more)]", - "Stagira, the birthplace of Aristotle, holds historical significance as a cultural landmark that contributed to the intellectual legacy of Aristotle and, by extension, Western philosophy. This connection underscores the importance of geographical context in the development of philosophical thought. [Data: Entities (3), Relationships (6, +more)]", - "The influence of Plato on Aristotle's teachings is evident in the establishment of the Lyceum, which reflects the enduring impact of Plato's philosophical ideas on subsequent educational institutions and the evolution of Western philosophy. [Data: Entities (2), Relationships (13, 26, +more)]" - ], - 'rating': 9.5, - 'rating_explanation': 'The impact severity rating is exceptionally high due to the foundational role these philosophers and their teachings have played in shaping Western thought and educational systems.', - 'embedding': None, - 'attributes': None - }, - ... # more communities - ], - 'total_entries': 3 - } - } - ``` - - - - - - The ID of the collection to get the communities from. If not provided, the communities will be retrieved from the default collection. - - - - The offset for pagination. Defaults to 0. - - - - - The limit for pagination. Defaults to 100. - - - - - The list of levels to filter by. As output of hierarchical clustering, each community is assigned a level. - - - - - The list of community numbers to filter by. - - -## Delete Graph - -Delete the graph for a collection using the `delete_graph_for_collection` method. - -```python -client.delete_graph_for_collection( - collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', - cascade=False -) -``` - - - The ID of the collection to delete the graph for. - - - - Whether to cascade the deletion. - - NOTE: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. - - -## Get Tuned Prompt - -```python -client.get_tuned_prompt( - prompt_name="graphrag_entity_description", - collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', - documents_offset=0, - documents_limit=100, - chunk_offset=0, - chunk_limit=100 -) -``` - - - - - The response containing the tuned prompt for GraphRAG. - ```bash - { - "results": { - "tuned_prompt": "string" - } - } - ``` - - - - - - The name of the prompt to tune. Valid values include "graphrag_entity_description", "graphrag_triples_extraction_few_shot", and "graphrag_community_reports". - - - - The ID of the collection to tune the prompt for. If not provided, the default collection will be used. - - - - The offset for pagination of documents. Defaults to 0. - - - - The limit for pagination of documents. Defaults to 100. Controls how many documents are used for tuning. - - - - The offset for pagination of chunks within each document. Defaults to 0. - - - - The limit for pagination of chunks within each document. Defaults to 100. Controls how many chunks per document are used for tuning. - - -The tuning process provides an LLM with chunks from each document in the collection. The relative sample size can therefore be controlled by adjusting the document and chunk limits. - -## Deduplicate Entities - -```python -client.deduplicate_entities( - collection_id='122fdf6a-e116-546b-a8f6-e4cb2e2c0a09', - entity_deduplication_settings=entity_deduplication_settings -) -``` - - - - - The response from the R2R system after deduplicating the entities. - ```bash - { - "message": "Entity deduplication task queued successfully.", - "task_id": "6e27dfca-606d-422d-b73f-2d9e138661b4" - } - ``` - - - - - - - The ID of the collection to deduplicate entities for. - - - - The settings for the entity deduplication process. - - - The type of deduplication to perform. Valid values are "by_name". More deduplication types will be added in the future. - - - The prompt to use for entity deduplication. - - - The configuration for text generation during entity deduplication. - - - The maximum length of the description for a node in the graph in characters (and not tokens). - Used so that we don't hit the input context window of the LLM while generating descriptions. - - - - -## Search and RAG - -Please see the [Search and RAG](/documentation/python-sdk/retrieval) documentation for more information on how to perform search and RAG using Knowledge Graphs. - -## API Reference - -Please see the [API documentation](/api-reference/endpoint/create_graph) for more information on the capabilities of the R2R Graph creation and enrichment API. diff --git a/docs/documentation/python-sdk/ingestion.mdx b/docs/documentation/python-sdk/ingestion.mdx deleted file mode 100644 index 6798fb813..000000000 --- a/docs/documentation/python-sdk/ingestion.mdx +++ /dev/null @@ -1,902 +0,0 @@ ---- -title: 'Ingestion' -description: 'Ingesting files with R2R.' ---- - - -This SDK documentation is periodically updated. For the latest parameter details, please cross-reference with the API Reference documentation. - - -Inside R2R, `ingestion` refers to the complete pipeline for processing input data: -- Parsing files into text -- Chunking text into semantic units -- Generating embeddings -- Storing data for retrieval - -Ingested files are stored with an associated document identifier as well as a user identifier to enable comprehensive management. - -## Document Ingestion and Management - - - -R2R has recently expanded the available options for ingesting files using multimodal foundation models. In addition to using such models by default for images, R2R can now use them on PDFs, [like it is shown here](https://github.com/getomni-ai/zerox), by passing the following in your ingestion configuration: - -```json -"ingestion_config": { - ..., - "parser_overrides": { - "pdf": "zerox" - } -} -``` - -We recommend this method for achieving the highest quality ingestion results. - - - - - -### Ingest Files - - - -Ingest files or directories into your R2R system: - -```python -file_paths = ['path/to/file1.txt', 'path/to/file2.txt'] -metadatas = [{'key1': 'value1'}, {'key2': 'value2'}] - -# Ingestion configuration for `R2R Full` -ingest_response = client.ingest_files( - file_paths=file_paths, - metadatas=metadatas, - # Runtime chunking configuration - ingestion_config={ - "provider": "unstructured_local", # Local processing - "strategy": "auto", # Automatic processing strategy - "chunking_strategy": "by_title", # Split on title boundaries - "new_after_n_chars": 256, # Start new chunk (soft limit) - "max_characters": 512, # Maximum chunk size (hard limit) - "combine_under_n_chars": 64, # Minimum chunk size - "overlap": 100, # Character overlap between chunks - "chunk_enrichment_settings": { # Document enrichment settings - "enable_chunk_enrichment": False, - } - } -) -``` - -An `ingested` file is parsed, chunked, embedded and stored inside your R2R system. The stored information includes a document identifier, a corresponding user identifier, and other metadata. Knowledge graph creation is done separately and at the `collection` level. Refer to the [ingestion configuration](/documentation/configuration/ingestion/parsing_and_chunking) section for comprehensive details on available options. - - - - - The response from the R2R system after ingesting the files. - ```bash - [{'message': 'Ingestion task queued successfully.', 'task_id': '6e27dfca-606d-422d-b73f-2d9e138661b4', 'document_id': 'c3291abf-8a4e-5d9d-80fd-232ef6fd8526'}, ...] - ``` - - - - - - A list of file paths or directory paths to ingest. If a directory path is provided, all files within the directory and its subdirectories will be ingested. - - - - An optional list of metadata dictionaries corresponding to each file. If provided, the length should match the number of files being ingested. - - - - An optional list of document IDs to assign to the ingested files. If provided, the length should match the number of files being ingested. - - - - An optional list of version strings for the ingested files. If provided, the length should match the number of files being ingested. - - - - - The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. Learn more about [configuration here](/documentation/configuration/ingestion/parsing_and_chunking). - - - Which R2R ingestion provider to use. Options are "r2r". - - - Only `recursive` is currently supported. - - - The target size for output chunks. - - - The target overlap fraction for output chunks - - - Dictionary of filetypes and selected override parsers. Currently only `{"pdf": "zerox"}` is supported. - - - - - - Which unstructured ingestion provider to use. Options are "unstructured_local", or "unstructured_api". - - - - Dictionary of filetypes and selected override parsers. Currently only `{"pdf": "zerox"}` is supported. - - - - Sets a maximum size on output chunks. - - - - Combine chunks smaller than this number of characters. - - - - Maximum number of characters per chunk. - - - - Whether to include coordinates in the output. - - - - Encoding to use for text files. - - - - Types of image blocks to extract. - - - - Content type for uncompressed gzip files. - - - - Name of the high-resolution model to use. - - - - Whether to include original elements in the output. - - - - Whether to include page breaks in the output. - - - - List of languages to consider for text processing. - - - - Whether to allow sections to span multiple pages. - - - - Start a new chunk after this many characters. - - - - Languages to use for OCR. - - - - Format of the output. - - - - Number of characters to overlap between chunks. - - - - Whether to overlap all chunks. - - - - Whether to infer table structure in PDFs. - - - - Threshold for considering chunks similar. - - - - Types of tables to skip inferring. - - - - Concurrency level for splitting PDFs. - - - - Whether to split PDFs by page. - - - - Page number to start processing from. - - - - Strategy for processing. Options are "auto", "fast", or "hi_res". - - - - Strategy for chunking. Options are "by_title" or "basic". - - - - Whether to generate unique IDs for elements. - - - - Whether to keep XML tags in the output. - - - - - Whether or not ingestion runs with [orchestration](/cookbooks/orchestration), default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result. - - - - - -### Understanding Ingestion Status - -After calling `ingest_files`, the response includes important status information: - -```python -# Successful ingestion -{ - 'message': 'Ingestion task queued successfully.', - 'task_id': '6e27dfca-606d-422d-b73f-2d9e138661b4', - 'document_id': 'c3291abf-8a4e-5d9d-80fd-232ef6fd8526' -} - -# Check document status later -doc_status = client.documents_overview( - document_ids=['c3291abf-8a4e-5d9d-80fd-232ef6fd8526'] -) -# ingestion_status will be one of: 'pending', 'processing', 'success', 'failed' -``` - - - -We have added support for contextual chunk enrichment! You can learn more about it [here](/cookbooks/contextual-enrichment). - -Currently, you need to enable it in your ingestion config: - -```toml -[ingestion.chunk_enrichment_settings] - enable_chunk_enrichment = true # disabled by default - strategies = ["semantic", "neighborhood"] - forward_chunks = 3 # Look ahead 3 chunks - backward_chunks = 3 # Look behind 3 chunks - semantic_neighbors = 10 # Find 10 semantically similar chunks - semantic_similarity_threshold = 0.7 # Minimum similarity score - generation_config = { model = "openai/gpt-4o-mini" } -``` - - - -### Ingest Chunks - -The `ingest_chunks` method allows direct ingestion of pre-processed text, bypassing the standard parsing pipeline. This is useful for: -- Custom preprocessing pipelines -- Streaming data ingestion -- Working with non-file data sources - -```python -chunks = [ - { - "text": "Aristotle was a Greek philosopher...", - }, - ..., - { - "text": "He was born in 384 BC in Stagira...", - } -] - -ingest_response = client.ingest_chunks( - chunks=chunks, - metadata={"title": "Aristotle", "source": "wikipedia"} -) -``` - - - - - The response from the R2R system after ingesting the chunks. - ```bash - {'message': 'Ingest chunks task queued successfully.', 'task_id': '8f27dfca-606d-422d-b73f-2d9e138661c3', 'document_id': 'd4391abf-8a4e-5d9d-80fd-232ef6fd8527'} - ``` - - - - - - A list of chunk dictionaries to ingest. Each dictionary should contain at least a "text" key with the chunk text. An optional "metadata" key can contain a dictionary of metadata for the chunk. - - - - An optional document ID to assign to the ingested chunks. If not provided, a new document ID will be generated. - - - - An optional metadata dictionary for the document. - - - - Whether or not ingestion runs with [orchestration](/cookbooks/orchestration), default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result. - - - -### Update Files - -Update existing documents while maintaining version history: - -```python -# Basic update with new metadata -update_response = client.update_files( - file_paths=file_paths, - document_ids=document_ids, - metadatas=[{ - "status": "reviewed" - }] -) - -# Update with custom chunking -update_response = client.update_files( - file_paths=file_paths, - document_ids=document_ids, - ingestion_config={ - "chunking_strategy": "by_title", - "max_characters": 1024 # Larger chunks for this version - } -) -``` - -The ingestion configuration can be customized analogously to the ingest files endpoint above. - - - - The response from the R2R system after updating the files. - ```bash - [{'message': 'Update files task queued successfully.', 'task_id': '6e27dfca-606d-422d-b73f-2d9e138661b4', 'document_id': '9f375ce9-efe9-5b57-8bf2-a63dee5f3621'}, ...] - ``` - - - - - - A list of file paths to update. - - - - A list of document IDs corresponding to the files being updated. When not provided, an attempt is made to generate the correct document id from the given user id and file path. - - - - An optional list of metadata dictionaries for the updated files. - - - - - The ingestion config override parameter enables developers to customize their R2R chunking strategy at runtime. Learn more about [configuration here](/documentation/configuration/ingestion/parsing_and_chunking). - - - Which R2R ingestion provider to use. Options are "r2r". - - - Only `recursive` is currently supported. - - - The target size for output chunks. - - - The target overlap fraction for output chunks - - - Dictionary of filetypes and selected override parsers. Currently only `{"pdf": "zerox"}` is supported. - - - - - - Which unstructured ingestion provider to use. Options are "unstructured_local", or "unstructured_api". - - - - Sets a maximum size on output chunks. - - - - Combine chunks smaller than this number of characters. - - - - Maximum number of characters per chunk. - - - - Whether to include coordinates in the output. - - - - Encoding to use for text files. - - - - Types of image blocks to extract. - - - - Content type for uncompressed gzip files. - - - - Name of the high-resolution model to use. - - - - Whether to include original elements in the output. - - - - Whether to include page breaks in the output. - - - - List of languages to consider for text processing. - - - - Whether to allow sections to span multiple pages. - - - - Start a new chunk after this many characters. - - - - Languages to use for OCR. - - - - Format of the output. - - - - Number of characters to overlap between chunks. - - - - Whether to overlap all chunks. - - - - Whether to infer table structure in PDFs. - - - - Threshold for considering chunks similar. - - - - Types of tables to skip inferring. - - - - Concurrency level for splitting PDFs. - - - - Whether to split PDFs by page. - - - - Page number to start processing from. - - - - Strategy for processing. Options are "auto", "fast", or "hi_res". - - - - Strategy for chunking. Options are "by_title" or "basic". - - - - Whether to generate unique IDs for elements. - - - - Whether to keep XML tags in the output. - - - - - Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result. - - - - -### Update Chunks - -Update the content of an existing chunk in your R2R system: - -```python -document_id = "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" -extraction_id = "aeba6400-1bd0-5ee9-8925-04732d675434" - -update_response = client.update_chunks( - document_id=document_id, - extraction_id=extraction_id, - text="Updated chunk content...", - metadata={"source": "manual_edit", "edited_at": "2024-10-24"} -) -``` - - - - - The response from the R2R system after updating the chunk. - ```bash - { - 'message': 'Update chunk task queued successfully.', - 'task_id': '7e27dfca-606d-422d-b73f-2d9e138661b4', - 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1' - } - ``` - - - - - - The ID of the document containing the chunk to update. - - - - The ID of the specific chunk to update. - - - - The new text content to replace the existing chunk text. - - - - An optional metadata dictionary for the updated chunk. If provided, this will replace the existing chunk metadata. - - - - Whether or not the update runs with orchestration, default is `True`. When set to `False`, the update process will run synchronous and directly return the result. - - - -### Documents Overview - -Retrieve high-level document information. Results are restricted to the current user's files, unless the request is made by a superuser, in which case results from all users are returned: - -```python -documents_overview = client.documents_overview() -``` - - - - - A list of dictionaries containing document information. - ```bash - [ - { - 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', - 'version': 'v0', - 'collection_ids': [], - 'ingestion_status': 'success', - 'restructuring_status': 'pending', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'title': 'aristotle.txt', - 'created_at': '2024-07-21T20:09:14.218741Z', - 'updated_at': '2024-07-21T20:09:14.218741Z', - 'metadata': {'title': 'aristotle.txt', 'version': 'v0', 'x': 'y'} - }, - ... - ] - ``` - - - - - - An optional list of document IDs to filter the overview. - - - An optional value to offset the starting point of fetched results, defaults to `0`. - - - An optional value to limit the fetched results, defaults to `100`. - - - -### Document Chunks - -Fetch and examine chunks for a particular document. Chunks represent the atomic units of text after processing: - -```python -document_id = "9fbe403b-c11c-5aae-8ade-ef22980c3ad1" -chunks = client.document_chunks(document_id) -``` - - - - - A list of dictionaries containing chunk information. - ```bash - [ - { - 'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath...', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', - 'extraction_id': 'aeba6400-1bd0-5ee9-8925-04732d675434', - 'fragment_id': 'f48bcdad-4155-52a4-8c9d-8ba06e996ba3' - 'metadata': {'title': 'aristotle.txt', 'version': 'v0', 'chunk_order': 0, 'document_type': 'txt', 'unstructured_filetype': 'text/plain', 'unstructured_languages': ['eng'], 'unstructured_parent_id': '971399f6ba2ec9768d2b5b92ab9d17d6', 'partitioned_by_unstructured': True} - }, - ... - ] - ``` - - - - - - - The ID of the document to retrieve chunks for. - - - An optional value to offset the starting point of fetched results, defaults to `0`. - - - An optional value to limit the fetched results, defaults to `100`. - - - An optional value to return the vectors associated with each chunk, defaults to `False`. - - - -### Delete Documents - -Delete a document by its ID: - -```python -delete_response = client.delete( - { - "document_id": - {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"} - } -) -``` - - - - - The response from the R2R system after successfully deleting the documents. - ```bash - {'results': {}} - ``` - - - - - - A list of logical filters to perform over input documents fields which identifies the unique set of documents to delete (e.g., `{"document_id": {"$eq": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1"}}`). Logical operations might include variables such as `"user_id"` or `"title"` and filters like `neq`, `gte`, etc. - - - -## Vector Index Management - -### Create Vector Index - - -Vector indices significantly improve search performance for large collections but add overhead for smaller datasets. Only create indices when working with hundreds of thousands of documents or when search latency is critical. - - -Create a vector index for similarity search: - - -```python -create_response = client.create_vector_index( - table_name="vectors", - index_method="hnsw", - index_measure="cosine_distance", - index_arguments={"m": 16, "ef_construction": 64}, - concurrently=True -) -``` - - - - - The response from the R2R system after creating the vector index. - ```bash - { - 'message': 'Vector index creation task queued successfully.', - 'task_id': '7d38dfca-606d-422d-b73f-2d9e138661b5' - } - ``` - - - - - - The table to create the index on. Options: vectors, entities_document, entities_collection, communities. Default: vectors - - - - The indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw - - - - Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance - - - - Configuration parameters for the chosen index method. - - -
    -
  • m (int): Number of connections per element
  • -
  • ef_construction (int): Size of the dynamic candidate list for construction
  • -
-
- -
    -
  • n_lists (int): Number of clusters/inverted lists
  • -
-
-
-
- - - Custom name for the index. If not provided, one will be auto-generated - - - - Whether to create the index concurrently. Default: True - - -#### Important Considerations - -Vector index creation requires careful planning and consideration of your data and performance requirements. Keep in mind: - -**Resource Intensive Process** -- Index creation can be CPU and memory intensive, especially for large datasets -- For HNSW indexes, memory usage scales with both dataset size and `m` parameter -- Consider creating indexes during off-peak hours for production systems - -**Performance Tuning** -1. **HNSW Parameters:** - - `m`: Higher values (16-64) improve search quality but increase memory usage and build time - - `ef_construction`: Higher values increase build time and quality but have diminishing returns past 100 - - Recommended starting point: `m=16`, `ef_construction=64` - -```python -# Example balanced configuration -client.create_vector_index( - table_name="vectors", - index_method="hnsw", - index_measure="cosine_distance", - index_arguments={ - "m": 16, # Moderate connectivity - "ef_construction": 64 # Balanced build time/quality - }, - concurrently=True -) -``` -**Pre-warming Required** -- **Important:** Newly created indexes require pre-warming to achieve optimal performance -- Initial queries may be slower until the index is loaded into memory -- The first several queries will automatically warm the index -- For production systems, consider implementing explicit pre-warming by running representative queries after index creation -- Without pre-warming, you may not see the expected performance improvements - -**Best Practices** -1. Always use `concurrently=True` in production to avoid blocking other operations -2. Monitor system resources during index creation -3. Test index performance with representative queries before deploying -4. Consider creating indexes on smaller test datasets first to validate parameters - -**Distance Measures** -Choose the appropriate measure based on your use case: -- `cosine_distance`: Best for normalized vectors (most common) -- `l2_distance`: Better for absolute distances -- `max_inner_product`: Optimized for dot product similarity - -### List Vector Indices - -List existing vector indices for a table: - -```python -indices = client.list_vector_indices(table_name="vectors") -``` - - - - - The response containing the list of indices. - ```bash - { - 'indices': [ - { - 'name': 'ix_vector_cosine_ops_hnsw__20241021211541', - 'table': 'vectors', - 'method': 'hnsw', - 'measure': 'cosine_distance' - }, - ... - ] - } - ``` - - - - - - The table to list indices from. Options: vectors, entities_document, entities_collection, communities. Default: vectors - - -### Delete Vector Index - -Delete a vector index from a table: - -```python -delete_response = client.delete_vector_index( - index_name="ix_vector_cosine_ops_hnsw__20241021211541", - table_name="vectors", - concurrently=True -) -``` - - - - - The response from the R2R system after deleting the vector index. - ```bash - { - 'message': 'Vector index deletion task queued successfully.', - 'task_id': '8e49efca-606d-422d-b73f-2d9e138661b6' - } - ``` - - - - - - Name of the index to delete - - - - The table containing the index. Options: vectors, entities_document, entities_collection, communities. Default: vectors - - - - Whether to delete the index concurrently. Default: True - - - -### Troubleshooting Common Issues - -#### Ingestion Failures -- Check file permissions and paths -- Verify file formats are supported -- Ensure metadata length matches file_paths length -- Monitor memory usage for large files - -#### Chunking Issues -- Large chunks may impact retrieval quality -- Small chunks may lose context -- Adjust overlap for better context preservation - -#### Vector Index Performance -- Monitor index creation time -- Check memory usage during creation -- Verify warm-up queries are representative -- Consider index rebuild if quality degrades diff --git a/docs/documentation/python-sdk/introduction.mdx b/docs/documentation/python-sdk/introduction.mdx deleted file mode 100644 index 07d7ea8ea..000000000 --- a/docs/documentation/python-sdk/introduction.mdx +++ /dev/null @@ -1,55 +0,0 @@ ---- -title: 'Introduction' -description: 'Build, scale, and manage user-facing Retrieval-Augmented Generation applications.' ---- - -# R2R Python SDK Documentation - -## Installation - -Before starting, make sure you have completed the [R2R installation](/documentation/installation). - -Install the R2R Python SDK: - -```bash -pip install r2r -``` - -## Getting Started - -1. Import the R2R client: - -```python -from r2r import R2RClient -``` - -2. Initialize the client: - -```python -client = R2RClient("http://localhost:7272") -``` - - -3. Check if R2R is running correctly: - -```python -health_response = client.health() -# {"status":"ok"} -``` - -4. Login (Optional): -```python -client.register("me@email.com", "my_password") -# client.verify_email("me@email.com", "my_verification_code") -client.login("me@email.com", "my_password") -``` -When using authentication the commands below automatically restrict the scope to a user's available documents. - -## Additional Documentation - -For more detailed information on specific functionalities of R2R, please refer to the following documentation: - -- [Document Ingestion](/documentation/python-sdk/ingestion): Learn how to add, retrieve, and manage documents in R2R. -- [Search & RAG](/documentation/python-sdk/retrieval): Explore various querying techniques and Retrieval-Augmented Generation capabilities. -- [Authentication](/documentation/python-sdk/auth): Understand how to manage users and implement authentication in R2R. -- [Observability](/documentation/python-sdk/observability): Learn about analytics and monitoring tools for your R2R system. diff --git a/docs/documentation/python-sdk/observability.mdx b/docs/documentation/python-sdk/observability.mdx deleted file mode 100644 index 966c60683..000000000 --- a/docs/documentation/python-sdk/observability.mdx +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: 'Observability' -description: 'Manage and observe your R2R system.' ---- - -## Analytics and Observability -R2R provides various tools for analytics and observability to help you monitor and improve the performance of your RAG system. - - - -The unique identifier of the completion message to be scored. Found in logs for a message. - - -The score to assign to the completion, ranging from -1 to 1. - diff --git a/docs/documentation/python-sdk/retrieval.mdx b/docs/documentation/python-sdk/retrieval.mdx deleted file mode 100644 index 3bb2a6e58..000000000 --- a/docs/documentation/python-sdk/retrieval.mdx +++ /dev/null @@ -1,861 +0,0 @@ ---- -title: 'Search & RAG' -description: 'Search and Retrieval-Augmented Generation with R2R.' ---- - - -Occasionally this SDK documentation falls out of date, cross-check with the automatcially generated API Reference documentation for the latest parameters. - - - -## AI Powered Search - -### Search - -Perform a basic vector search: - -```python -search_response = client.search("What was Uber's profit in 2020?") -``` - - - - - - The search results from the R2R system. - ```python - { - 'results': { - 'vector_search_results': [ - { - 'fragment_id': '13a12fc0-cbce-5e35-b179-d413c15179cb', - 'extraction_id': '2b8ff2e9-c135-573d-bf8a-7a2db60a0a11', - 'document_id': '3e157b3a-8469-51db-90d9-52e7d896b49b', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'collection_ids': [], - 'score': 0.7449709925072809, - 'text': 'Net\n loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement ...', - 'metadata': {'title': 'uber_2021.pdf', 'version': 'v0', 'chunk_order': 5, 'associatedQuery': "What was Uber's profit in 2020?"} - }, ... - ], - 'kg_search_results': None - } - } - ``` - - - - - - - The search query. - - - - Optional settings for vector search, either a dictionary, a `VectorSearchSettings` object, or `None` may be passed. If a dictionary or `None` is passed, then R2R will use server-side defaults for non-specified fields. - - - - Whether to use vector search. - - - - Whether to perform a hybrid search (combining vector and keyword search). - - - - - Alias for `search_filters`, now `deprecated`. - - - - Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. - - Commonly seen filters include operations include the following: - - `{"document_id": {"$eq": "9fbe403b-..."}}` - - `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` - - `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` - - `{"$and": {"$document_id": ..., "collection_ids": ...}}` - - - - Maximum number of results to return (1-1000). - - - - Group IDs to search for. - - - - The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product). - - - - Whether to include search score values in the search results. - - - - Whether to include element metadata in the search results. - - - - Number of ivfflat index lists to query. Higher increases accuracy but decreases speed. - - - - Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed. - - - - Settings for hybrid search. - - - Weight to apply to full text search. - - - - Weight to apply to semantic search. - - - - Maximum number of results to return from full text search. - - - - K-value for RRF (Rank Reciprocal Fusion). - - - - - - - Optional settings for knowledge graph search, either a dictionary, a `KGSearchSettings` object, or `None` may be passed. If a dictionary or `None` is passed, then R2R will use server-side defaults for non-specified fields. - - - - Whether to use knowledge graph search. - - - - Type of knowledge graph search. Can be 'global' or 'local'. - - - - Level of knowledge graph search. - - - - Configuration for knowledge graph search generation. - - - - List of entity types to use for knowledge graph search. - - - - List of relationships to use for knowledge graph search. - - - - Maximum length of community descriptions. - - - - Maximum number of LLM queries for global search. - - - - Limits for local search on different types of elements. - - - - - - -### Search custom settings - -Learn more about the search [API here](/api-reference/endpoint/search). It allows searching with custom settings, such as bespoke document filters and larger search limits: -```python -# returns only chunks from documents with title `document_title` -filtered_search_response = client.search( - "What was Uber's profit in 2020?", - vector_search_settings={ - # restrict results to the Uber document - "filters": {"title": {"$eq": "uber_2021.pdf"}}, - "search_limit": 100 - } -) -``` - - -### Hybrid Search - - -Learn more about the dedicated knowledge graph capabilities [in R2R here](/cookbooks/hybrid-search). Combine traditional keyword-based search with vector search: - -```python -hybrid_search_response = client.search( - "What was Uber's profit in 2020?", - vector_search_settings={ - "use_hybrid_search": True, - "search_limit": 20, - "hybrid_search_settings": { - "full_text_weight": 1.0, - "semantic_weight": 10.0, - "full_text_limit": 200, - "rrf_k": 25, - }, - } -) -``` - -### Knowledge Graph Search - -Learn more about the dedicated knowledge graph capabilities [in R2R here](/cookbooks/graphrag). You can utilize knowledge graph capabilities to enhance search results, as shown below: - -```python -kg_search_response = client.search( - "What is airbnb", - vector_search_settings={"use_vector_search": False}, - kg_search_settings={ - "use_kg_search": True, - "kg_search_type": "local", - "kg_search_level": "0", - "generation_config": { - "model": "openai/gpt-4o-mini", - "temperature": 0.7, - }, - "local_search_limits": { - "__Entity__": 20, - "__Relationship__": 20, - "__Community__": 20, - }, - "max_community_description_length": 65536, - "max_llm_queries_for_global_search": 250 - } -) -``` - - - - - The knowledge graph search results from the R2R system. - ```bash - { - "kg_search_results": - [ - { - "global_result": None, - "local_result": { - "communities": { - "0": { - "summary": { - "findings": [ - { - "explanation": "Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...", - "summary": "Aristotle's Foundational Role in Logic" - } - ], - "rating": 9.5, - "rating_explanation": "The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.", - "summary": "The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.", - "title": "Aristotle and His Contributions" - } - } - }, - "entities": { - "0": { - "description": "Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...", - "name": "Aristotle" - } - }, - "query": "Who is Aristotle?", - "relationships": {} - } - } - ], - "vector_search_results": None - } - ``` - - - - -## Retrieval-Augmented Generation (RAG) - -### Basic RAG - -Generate a response using RAG: - -```python -rag_response = client.rag("What was Uber's profit in 2020?") -``` - - - - - The RAG response from the R2R system. - ```bash - { - 'results': { - 'completion': { - 'id': 'chatcmpl-9ySStnC0oEhnGPPV1k8ZYnxBKOuW8', - 'choices': [{ - 'finish_reason': 'stop', - 'index': 0, - 'logprobs': None, - 'message': { - 'content': "Uber's profit in 2020 was a net loss of $6.77 billion." - }, - ... - }] - }, - 'search_results': { - 'vector_search_results': [...], - 'kg_search_results': None - } - } - } - ``` - - - - - - The query for RAG. - - - - Optional settings for vector search, either a dictionary, a `VectorSearchSettings` object, or `None` may be passed. If a dictionary is used, non-specified fields will use the server-side default. - - - - Whether to use vector search. - - - - Whether to perform a hybrid search (combining vector and keyword search). - - - - - Alias for `search_filters`, now `deprecated`. - - - - Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. - - Commonly seen filters include operations include the following: - - `{"document_id": {"$eq": "9fbe403b-..."}}` - - `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` - - `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` - - `{"$and": {"$document_id": ..., "collection_ids": ...}}` - - - - Maximum number of results to return (1-1000). - - - - Collection Ids to search for. - - - - The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product). - - - - Whether to include search score values in the search results. - - - - Whether to include element metadata in the search results. - - - - Number of ivfflat index lists to query. Higher increases accuracy but decreases speed. - - - - Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed. - - - - Settings for hybrid search. - - - Weight to apply to full text search. - - - - Weight to apply to semantic search. - - - - Maximum number of results to return from full text search. - - - - K-value for RRF (Rank Reciprocal Fusion). - - - - - - - - Optional settings for knowledge graph search, either a dictionary, a `KGSearchSettings` object, or `None` may be passed. If a dictionary or `None` is passed, then R2R will use server-side defaults for non-specified fields. - - - - Whether to use knowledge graph search. - - - - Type of knowledge graph search. Can be 'global' or 'local'. - - - - Level of knowledge graph search. - - - - Configuration for knowledge graph search generation. - - - - List of entity types to use for knowledge graph search. - - - - List of relationships to use for knowledge graph search. - - - - Maximum length of community descriptions. - - - - Maximum number of LLM queries for global search. - - - - Limits for local search on different types of elements. - - - - - - Optional configuration for LLM to use during RAG generation, including model selection and parameters. Will default to values specified in `r2r.toml`. - - - Model used in final LLM completion. - - - - Temperature used in final LLM completion. - - - - The `top_p` used in final LLM completion. - - - - The `max_tokens_to_sample` used in final LLM completion. - - - - The `functions` used in final LLM completion. - - - - The `tools` used in final LLM completion. - - - - The `api_base` used in final LLM completion. - - - - - - - - Optional custom prompt to override the default task prompt. - - - - Augment document chunks with their respective document titles? - - - -### RAG with custom search settings - -Learn more about the RAG [API here](/api-reference/endpoint/rag). It allows performing RAG with custom settings, such as hybrid search: - -```python -hybrid_rag_response = client.rag( - "Who is Jon Snow?", - vector_search_settings={"use_hybrid_search": True} -) -``` - -### RAG with custom completion LLM - -R2R supports configuration on server-side and at runtime, which you can [read about here](/documentation/configuration/rag). An example below, using Anthropic at runtime: - -```python -anthropic_rag_response = client.rag( - "What is R2R?", - rag_generation_config={"model":"anthropic/claude-3-opus-20240229"} -) -``` - - -### Streaming RAG - -R2R supports streaming RAG responses for real-time applications: - -```python -stream_response = client.rag( - "Who was Aristotle?", - rag_generation_config={"stream": True} -) -for chunk in stream_response: - print(chunk, end='', flush=True) -``` - - - - ```bash - ["{\"id\":\"808c47c5-ebef-504a-a230-aa9ddcfbd87 .... - Lyft reported a net loss of $1,752,857,000 in 2020 according to [2]. Therefore, Lyft did not make a profit in 2020. - ``` - - - - - -### Advanced RAG Techniques - -R2R supports advanced Retrieval-Augmented Generation (RAG) techniques that can be easily configured at runtime. These techniques include Hypothetical Document Embeddings (HyDE) and RAG-Fusion, which can significantly enhance the quality and relevance of retrieved information. - -To use an advanced RAG technique, you can specify the `search_strategy` parameter in your vector search settings: - -```python -from r2r import R2RClient - -client = R2RClient() - -# Using HyDE -hyde_response = client.rag( - "What are the main themes in Shakespeare's plays?", - vector_search_settings={ - "search_strategy": "hyde", - "search_limit": 10 - } -) - -# Using RAG-Fusion -rag_fusion_response = client.rag( - "Explain the theory of relativity", - vector_search_settings={ - "search_strategy": "rag_fusion", - "search_limit": 20 - } -) -``` - - -For a comprehensive guide on implementing and optimizing advanced RAG techniques in R2R, including HyDE and RAG-Fusion, please refer to our [Advanced RAG Cookbook](/cookbooks/advanced-rag). - - -### Customizing RAG - -Putting everything together for highly customized RAG functionality at runtime: - -```python - -custom_rag_response = client.rag( - "Who was Aristotle?", - vector_search_settings={ - "use_hybrid_search": True, - "search_limit": 20, - "hybrid_search_settings": { - "full_text_weight": 1.0, - "semantic_weight": 10.0, - "full_text_limit": 200, - "rrf_k": 25, - }, - }, - kg_search_settings={ - "use_kg_search": True, - "kg_search_type": "local", - }, - rag_generation_config={ - "model": "anthropic/claude-3-haiku-20240307", - "temperature": 0.7, - "stream": True - }, - task_prompt_override="Only answer the question if the context is SUPER relevant!!\n\nQuery:\n{query}\n\nContext:\n{context}" -) -``` - -## Agents - -### Multi-turn agentic RAG -The R2R application includes agents which come equipped with a search tool, enabling them to perform RAG. Using the R2R Agent for multi-turn conversations: - -```python -messages = [ - {"role": "user", "content": "What was Aristotle's main contribution to philosophy?"}, - {"role": "assistant", "content": "Aristotle made numerous significant contributions to philosophy, but one of his main contributions was in the field of logic and reasoning. He developed a system of formal logic, which is considered the first comprehensive system of its kind in Western philosophy. This system, often referred to as Aristotelian logic or term logic, provided a framework for deductive reasoning and laid the groundwork for scientific thinking."}, - {"role": "user", "content": "Can you elaborate on how this influenced later thinkers?"} -] - -rag_agent_response = client.agent( - messages, - vector_search_settings={"use_hybrid_search":True}, -) -``` - -Note that any of the customization seen in AI powered search and RAG documentation above can be applied here. - - - - - The agent endpoint will return the entire conversation as a response, including internal tool calls. - ```bash - { 'results': - [ - {'role': 'system', 'content': '## You are a helpful agent that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.\n', 'name': None, 'function_call': None, 'tool_calls': None}, - {'role': 'user', 'content': "What was Aristotle's main contribution to philosophy?", 'name': None, 'function_call': None, 'tool_calls': None}, - {'role': 'assistant', 'content': 'Aristotle made numerous significant contributions to philosophy, but one of his main contributions was in the field of logic and reasoning. He developed a system of formal logic, which is considered the first comprehensive system of its kind in Western philosophy. This system, often referred to as Aristotelian logic or term logic, provided a framework for deductive reasoning and laid the groundwork for scientific thinking.', 'name': None, 'function_call': None, 'tool_calls': None}, - {'role': 'user', 'content': 'Can you elaborate on how this influenced later thinkers?', 'name': None, 'function_call': None, 'tool_calls': None}, - {'role': 'assistant', 'content': None, 'name': None, 'function_call': {'name': 'search', 'arguments': '{"query":"Aristotle\'s influence on later thinkers in philosophy"}'}, 'tool_calls': None}, - {'role': 'function', 'content': '1. ormation: List of writers influenced by Aristotle More than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, "it is doubtful whether any human being has ever known as much as he did".[145]\n2. subject of contemporary philosophical discussion. Aristotle\'s views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition\n3. the scholastic tradition of the Catholic Church. Aristotle was revered among medieval Muslim scholars as "The First Teacher", and among medieval Christians like Thomas Aquinas as simply "The Philosopher", while the poet Dante called him "the master of those who know". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle\'s influence on logic continued well into the 19th century. In addition, his ethics, although\n4. hilosophy\nFurther information: Peripatetic school The immediate influence of Aristotle\'s work was felt as the Lyceum grew into the Peripatetic school. Aristotle\'s students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle\'s influence over Alexander the Great is seen in the latter\'s bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal\n5. scholastic philosophers. Alkindus greatly admired Aristotle\'s philosophy,[168] and Averroes spoke of Aristotle as the "exemplar" for all future philosophers.[169] Medieval Muslim scholars regularly described Aristotle as the "First Teacher".[167] The title was later used by Western philosophers (as in the famous poem of Dante) who were influenced by the tradition of Islamic philosophy.[170]\n\nMedieval Europe\nFurther information: Aristotelianism and Syllogism § Medieval\n6. those by James of Venice and William of Moerbeke. After the Scholastic Thomas Aquinas wrote his Summa Theologica, working from Moerbeke\'s translations and calling Aristotle "The Philosopher",[172] the demand for Aristotle\'s writings grew, and the Greek manuscripts returned to the West, stimulating a revival of Aristotelianism in Europe that continued into the Renaissance.[173] These thinkers blended Aristotelian philosophy with Christianity, bringing the thought of Ancient Greece into the Middle Ages.\n7. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n8. , individualism, teleology, and meteorology.[151] The scholar Taneli Kukkonen notes that "in the best 20th-century scholarship Aristotle comes alive as a thinker wrestling with the full weight of the Greek philosophical tradition."[148] What follows is an overview of the transmission and influence of his texts and ideas into the modern era.\n\nHis successor, Theophrastus\nMain articles: Theophrastus and Historia Plantarum (Theophrastus)\n9. man being has ever known as much as he did".[145] Among countless other achievements, Aristotle was the founder of formal logic,[146] pioneered the study of zoology, and left every future scientist and philosopher in his debt through his contributions to the scientific method.[2][147][148] Taneli Kukkonen, observes that his achievement in founding two sciences is unmatched, and his reach in influencing "every branch of intellectual enterprise" including Western ethical and political theory, theology,\n10. ight) in the Kitāb naʿt al-ḥayawān, c.\u20091220.[166] Aristotle was one of the most revered Western thinkers in early Islamic theology. Most of the still extant works of Aristotle,[167] as well as a number of the original Greek commentaries, were translated into Arabic and studied by Muslim philosophers, scientists and scholars. Averroes, Avicenna and Alpharabius, who wrote on Aristotle in great depth, also influenced Thomas Aquinas and other Western Christian scholastic philosophers. Alkindus greatly admired\n', 'name': 'search', 'function_call': None, 'tool_calls': None}, - {'role': 'assistant', 'content': 'Aristotle\'s contributions to philosophy, particularly his development of formal logic, had a profound influence on later thinkers across various cultures and eras. Here are some key ways in which his work influenced subsequent intellectual traditions:\n\n1. **Medieval Islamic Philosophy**: Aristotle was highly revered among medieval Muslim scholars, who referred to him as "The First Teacher." His works were extensively studied and commented upon by philosophers such as Averroes and Avicenna. These scholars played a crucial role in preserving and expanding upon Aristotle\'s ideas, which later influenced Western thought [5][10].\n\n2. **Christian Scholasticism**: In medieval Europe, Aristotle\'s works were integrated into Christian theology, particularly through the efforts of Thomas Aquinas, who referred to Aristotle as "The Philosopher." Aquinas\' synthesis of Aristotelian philosophy with Christian doctrine became a cornerstone of Scholasticism, a dominant intellectual tradition in medieval Europe [3][6].\n\n3. **Renaissance and Enlightenment**: Aristotle\'s influence persisted into the Renaissance, where his works were revived and studied extensively. This period saw a renewed interest in classical texts, and Aristotle\'s ideas continued to shape scientific and philosophical inquiry until the Enlightenment, when new scientific paradigms began to emerge [2][6].\n\n4. **Development of Logic**: Aristotle\'s system of formal logic remained the standard for centuries and was studied by medieval scholars such as Peter Abelard and Jean Buridan. His influence on logic extended well into the 19th century, shaping the development of this field [3].\n\n5. **Peripatetic School**: Aristotle\'s immediate influence was also felt through the Peripatetic school, which he founded. His students, including Theophrastus, carried on his work and further developed his ideas, ensuring that his intellectual legacy continued [4][8].\n\nOverall, Aristotle\'s contributions laid the groundwork for many fields of study and influenced a wide range of thinkers, making him one of the most significant figures in the history of Western philosophy.', 'name': None, 'function_call': None, 'tool_calls': None} - ] - } - ``` - - - - - - - The list of messages to pass the RAG agent. - - - Optional settings for vector search, either a dictionary, a `VectorSearchSettings` object, or `None` may be passed. If a dictionary is used, non-specified fields will use the server-side default. - - - - Whether to use vector search. - - - - Whether to perform a hybrid search (combining vector and keyword search). - - - - Alias for `search_filters`, now `deprecated`. - - - - Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. - - Commonly seen filters include operations include the following: - - `{"document_id": {"$eq": "9fbe403b-..."}}` - - `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` - - `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` - - `{"$and": {"$document_id": ..., "collection_ids": ...}}` - - - - Maximum number of results to return (1-1000). - - - - Collection Ids to search for. - - - - The distance measure to use for indexing (cosine_distance, l2_distance, or max_inner_product). - - - - Whether to include search score values in the search results. - - - - Whether to include element metadata in the search results. - - - - Number of ivfflat index lists to query. Higher increases accuracy but decreases speed. - - - - Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed. - - - - Settings for hybrid search. - - - Weight to apply to full text search. - - - - Weight to apply to semantic search. - - - - Maximum number of results to return from full text search. - - - - K-value for RRF (Rank Reciprocal Fusion). - - - - - - - - Optional settings for knowledge graph search, either a dictionary, a `KGSearchSettings` object, or `None` may be passed. If a dictionary or `None` is passed, then R2R will use server-side defaults for non-specified fields. - - - - Whether to use knowledge graph search. - - - - Type of knowledge graph search. Can be 'global' or 'local'. - - - - Level of knowledge graph search. - - - - Configuration for knowledge graph search generation. - - - - List of entity types to use for knowledge graph search. - - - - List of relationships to use for knowledge graph search. - - - - Maximum length of community descriptions. - - - - Maximum number of LLM queries for global search. - - - - Limits for local search on different types of elements. - - - - - - - Optional configuration for LLM to use during RAG generation, including model selection and parameters. Will default to values specified in `r2r.toml`. - - - Model used in final LLM completion. - - - - Temperature used in final LLM completion. - - - - The `top_p` used in final LLM completion. - - - - The `max_tokens_to_sample` used in final LLM completion. - - - - The `functions` used in final LLM completion. - - - - The `tools` used in final LLM completion. - - - - The `api_base` used in final LLM completion. - - - - - - - Optional custom prompt to override the default task prompt. - - - - - -### Multi-turn agentic RAG with streaming -The response from the RAG agent may be streamed directly back - -```python -messages = [ - {"role": "user", "content": "What was Aristotle's main contribution to philosophy?"}, - {"role": "assistant", "content": "Aristotle made numerous significant contributions to philosophy, but one of his main contributions was in the field of logic and reasoning. He developed a system of formal logic, which is considered the first comprehensive system of its kind in Western philosophy. This system, often referred to as Aristotelian logic or term logic, provided a framework for deductive reasoning and laid the groundwork for scientific thinking."}, - {"role": "user", "content": "Can you elaborate on how this influenced later thinkers?"} -] - -rag_agent_response = client.agent( - messages, - vector_search_settings={"use_hybrid_search":True}, - rag_generation_config={"stream":True} -) -``` - - - - - - The agent endpoint will stream back its response, including internal tool calls. - ```bash - search{"query":"Aristotle's influence on later thinkers in philosophy"}"{\"id\":\"b234931e-0cfb-5644-8f23-560a3097f5fe\",\"score\":1.0,\"metadata\":{\"text\":\"ormation: List of writers influenced by Aristotle More than 2300 years after his death, Aristotle remains one of the most influential people who ever lived.[142][143][144] He contributed to almost every field of human knowledge then in existence, and he was the founder of many new fields. According to the philosopher Bryan Magee, \\\"it is doubtful whether any human being has ever known as much as he did\\\".[145]\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"1827ac2c-2a06-5bc2-ad29-aa14b4d99540\",\"score\":1.0,\"metadata\":{\"text\":\"subject of contemporary philosophical discussion. Aristotle's views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"94718936-ea92-5e29-a5ee-d4a6bc037384\",\"score\":1.0,\"metadata\":{\"text\":\"the scholastic tradition of the Catholic Church. Aristotle was revered among medieval Muslim scholars as \\\"The First Teacher\\\", and among medieval Christians like Thomas Aquinas as simply \\\"The Philosopher\\\", while the poet Dante called him \\\"the master of those who know\\\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle's influence on logic continued well into the 19th century. In addition, his ethics, although\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"16483f14-f8a2-5c5c-8fcd-1bcbbd6603e4\",\"score\":1.0,\"metadata\":{\"text\":\"hilosophy\\nFurther information: Peripatetic school The immediate influence of Aristotle's work was felt as the Lyceum grew into the Peripatetic school. Aristotle's students included Aristoxenus, Dicaearchus, Demetrius of Phalerum, Eudemos of Rhodes, Harpalus, Hephaestion, Mnason of Phocis, Nicomachus, and Theophrastus. Aristotle's influence over Alexander the Great is seen in the latter's bringing with him on his expedition a host of zoologists, botanists, and researchers. He had also learned a great deal\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"26eb20ee-a203-5ad5-beaa-511cc526aa6e\",\"score\":1.0,\"metadata\":{\"text\":\"scholastic philosophers. Alkindus greatly admired Aristotle's philosophy,[168] and Averroes spoke of Aristotle as the \\\"exemplar\\\" for all future philosophers.[169] Medieval Muslim scholars regularly described Aristotle as the \\\"First Teacher\\\".[167] The title was later used by Western philosophers (as in the famous poem of Dante) who were influenced by the tradition of Islamic philosophy.[170]\\n\\nMedieval Europe\\nFurther information: Aristotelianism and Syllogism \u00a7 Medieval\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"a08fd1b4-4e6f-5487-9af6-df2f6cfe1048\",\"score\":1.0,\"metadata\":{\"text\":\"those by James of Venice and William of Moerbeke. After the Scholastic Thomas Aquinas wrote his Summa Theologica, working from Moerbeke's translations and calling Aristotle \\\"The Philosopher\\\",[172] the demand for Aristotle's writings grew, and the Greek manuscripts returned to the West, stimulating a revival of Aristotelianism in Europe that continued into the Renaissance.[173] These thinkers blended Aristotelian philosophy with Christianity, bringing the thought of Ancient Greece into the Middle Ages.\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"9689a804-5a95-5696-97da-a076a3eb8320\",\"score\":1.0,\"metadata\":{\"text\":\"Aristotle[A] (Greek: \u1f08\u03c1\u03b9\u03c3\u03c4\u03bf\u03c4\u03ad\u03bb\u03b7\u03c2 Aristot\u00e9l\u0113s, pronounced [aristot\u00e9l\u025b\u02d0s]; 384\u2013322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"dd19a0d6-4cef-590b-9721-35c26b1ee056\",\"score\":1.0,\"metadata\":{\"text\":\", individualism, teleology, and meteorology.[151] The scholar Taneli Kukkonen notes that \\\"in the best 20th-century scholarship Aristotle comes alive as a thinker wrestling with the full weight of the Greek philosophical tradition.\\\"[148] What follows is an overview of the transmission and influence of his texts and ideas into the modern era.\\n\\nHis successor, Theophrastus\\nMain articles: Theophrastus and Historia Plantarum (Theophrastus)\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"8d125c7a-0084-5adf-b094-c96c91611897\",\"score\":1.0,\"metadata\":{\"text\":\"man being has ever known as much as he did\\\".[145] Among countless other achievements, Aristotle was the founder of formal logic,[146] pioneered the study of zoology, and left every future scientist and philosopher in his debt through his contributions to the scientific method.[2][147][148] Taneli Kukkonen, observes that his achievement in founding two sciences is unmatched, and his reach in influencing \\\"every branch of intellectual enterprise\\\" including Western ethical and political theory, theology,\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}","{\"id\":\"40d671b0-a412-5822-b088-461baf2324e6\",\"score\":1.0,\"metadata\":{\"text\":\"ight) in the Kit\u0101b na\u02bft al-\u1e25ayaw\u0101n, c.\u20091220.[166] Aristotle was one of the most revered Western thinkers in early Islamic theology. Most of the still extant works of Aristotle,[167] as well as a number of the original Greek commentaries, were translated into Arabic and studied by Muslim philosophers, scientists and scholars. Averroes, Avicenna and Alpharabius, who wrote on Aristotle in great depth, also influenced Thomas Aquinas and other Western Christian scholastic philosophers. Alkindus greatly admired\",\"title\":\"aristotle.txt\",\"user_id\":\"2acb499e-8428-543b-bd85-0d9098718220\",\"document_id\":\"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\",\"extraction_id\":\"69431c4a-30cf-504f-8fab-7dcfc7580c63\",\"associatedQuery\":\"Aristotle's influence on later thinkers in philosophy\"}}"Aristotle's contributions to philosophy, particularly his development of formal logic, had a profound influence on later thinkers across various cultures and eras. Here are some key ways in which his work influenced subsequent intellectual traditions: - - 1. **Medieval Islamic Philosophy**: Aristotle was highly revered among medieval Muslim scholars, who referred to him as "The First Teacher." His works were extensively translated into Arabic and studied by philosophers such as Averroes and Avicenna. These scholars not only preserved Aristotle's works but also expanded upon them, influencing both Islamic and Western thought [5][10]. - - 2. **Christian Scholasticism**: In medieval Europe, Aristotle's works were integrated into Christian theology, particularly through the efforts of Thomas Aquinas, who referred to Aristotle as "The Philosopher." Aquinas's synthesis of Aristotelian philosophy with Christian doctrine became a cornerstone of Scholasticism, a dominant intellectual tradition in medieval Europe [3][6]. - - 3. **Renaissance and Enlightenment**: Aristotle's influence persisted into the Renaissance and Enlightenment periods. His works on logic, ethics, and natural sciences were foundational texts for scholars during these eras. The revival of Aristotelianism during the Renaissance helped bridge the gap between ancient Greek philosophy and modern scientific thought [2][6]. - - 4. **Development of Modern Science**: Aristotle's method of systematic observation and classification in natural sciences laid the groundwork for the scientific method. His influence extended well into the 19th century, impacting the development of various scientific disciplines [9]. - - 5. **Peripatetic School**: Aristotle's immediate influence was felt through the Peripatetic school, which he founded. His students, including Theophrastus, continued to develop and disseminate his ideas, ensuring that his philosophical legacy endured [4][8]. - - Overall, Aristotle's contributions to logic, ethics, natural sciences, and metaphysics created a foundation upon which much of Western intellectual tradition was built. His work influenced a wide range of fields and thinkers, making him one of the most pivotal figures in the history of philosophy. - ``` - - - diff --git a/docs/documentation/quickstart.mdx b/docs/documentation/quickstart.mdx deleted file mode 100644 index 2710931a5..000000000 --- a/docs/documentation/quickstart.mdx +++ /dev/null @@ -1,222 +0,0 @@ ---- -title: 'Quickstart' -description: 'Getting started with R2R' -icon: 'bolt' ---- - -This basic quickstart shows how to: - -1. Ingest files into your R2R system -2. Search over ingested files -3. Request or stream a RAG (Retrieval-Augmented Generation) response -4. Use the RAG Agent for more complex, interactive queries - -Be sure to complete the [installation instructions](/documentation/installation) before continuing with this guide. If you prefer to dive straight into the API details, select a choice from below: - - - - - - - - -## Getting started - -Start by checking that you have correctly deployed your R2R instance locally: - -```bash -curl http://localhost:7272/v2/health -# {"results":{"response":"ok"}} -``` - - -SciPhi offers managed enterprise solutions for R2R. If you're interested in a fully managed, scalable deployment of R2R for your organization, please contact their team at founders@sciphi.ai for more information on enterprise offerings. - - - -## Ingesting file(s) and directories - -The remainder of this quickstart will proceed with CLI commands, but all of these commands are easily reproduced inside of the Javascript or Python SDK. - -Ingest your selected files or directories: - -```bash -r2r ingest-files --file-paths /path/to/your_file_1 /path/to/your_dir_1 ... -``` - -**For testing**: Use the sample file(s) included inside the R2R project: - -```bash -r2r ingest-sample-file -# or r2r ingest-sample-files for multi-ingestion -``` - -Example output: -```plaintext -[{'message': 'Ingestion task queued successfully.', 'task_id': '2b16bb55-4f47-4e66-a6bd-da9e215b9793', 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1'}] -``` - -When no document ID(s) are provided to the ingest_files endpoint, a unique document ID is automatically generated for each ingested document from the input filepath and user id. - -After successful ingestion, the documents overview endpoint will return output like so: -```bash -r2r documents-overview -``` - -Example output: -```plaintext -{ - 'id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', - 'title': 'aristotle.txt', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - ... - 'ingestion_status': 'parsing', - ... -} -... within 10s ... -{ - 'id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', - ... - 'ingestion_status': 'success', - ... -} -``` - -Ingestion is complete when all documents are in a `success` or `failed` state. -## Executing a search - -Perform a search query: - -```bash -r2r search --query="who was aristotle?" -``` - -The search query will use basic similarity search to find the most relevant documents. You can use advanced search methods like [hybrid search](/cookbooks/hybrid-search) or [knowledge graph search](/cookbooks/graphrag) depending on your use case. - -Example output: -```plaintext -{'results': - {'vector_search_results': [ - { - 'fragment_id': '34c32587-e2c9-529f-b0a7-884e9a3c3b2e', - 'extraction_id': '8edf5123-0a5c-568c-bf97-654b6adaf8dc', - 'document_id': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1', - 'user_id': '2acb499e-8428-543b-bd85-0d9098718220', - 'collection_ids': [], - 'score': 0.780314067545999, - 'text': 'Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.', - 'metadata': { - 'title': 'aristotle.txt', - 'version': 'v0', - 'chunk_order': 0, - ... -``` - -## RAG Response - -Generate a RAG response: - -```bash -r2r rag --query="who was aristotle?" --use-hybrid-search -``` - -Example output: -```plaintext -Search Results: -{'vector_search_results': ... } -Completion: -{'results': [ - { - 'id': 'chatcmpl-9eXL6sKWlUkP3f6QBnXvEiKkWKBK4', - 'choices': [ - { - 'finish_reason': 'stop', - 'index': 0, - 'logprobs': None, - 'message': { - 'content': "Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose writings covered a broad range of subjects including the natural sciences, - ... -``` - -## Stream a RAG Response - -Stream a RAG response: - -```bash -r2r rag --query="who was aristotle?" --stream --use-hybrid-search -``` - -Example output (streamed): -```plaintext -"{\"fragment_id\":\"34c32587-e2c9-52.....}" -Aristotle (384–322 BC) was an Ancient Greek philosopher ... -``` - -## Using the RAG Agent - -The RAG Agent provides a more interactive and intelligent way to query your knowledge base. It can formulate its own questions, search for information, and provide informed responses based on the retrieved context. - -### Basic RAG Agent Usage - -Here's how to use the RAG Agent for a simple query: - -```python -from r2r import R2RClient - -client = R2RClient("http://localhost:7272") -# when using auth, do client.login(...) - -messages = [ - {"role": "user", "content": "What was Aristotle's main contribution to philosophy?"}, - {"role": "assistant", "content": "Aristotle made numerous significant contributions to philosophy, but one of his main contributions was in the field of logic and reasoning. He developed a system of formal logic, which is considered the first comprehensive system of its kind in Western philosophy. This system, often referred to as Aristotelian logic or term logic, provided a framework for deductive reasoning and laid the groundwork for scientific thinking."}, - {"role": "user", "content": "Can you elaborate on how this influenced later thinkers?"} -] - -result = client.agent( - messages=messages, - vector_search_settings={"use_hybrid_search":True}, - rag_generation_config={"model": "openai/gpt-4o", "temperature": 0.7} -) -print(result) -``` - -## Additional Features - -R2R offers additional features to enhance your document management and user experience: - -### User Authentication - -R2R provides a complete set of user authentication and management features, allowing you to implement secure and feature-rich authentication systems or integrate with your preferred authentication provider. - - - - Learn how to implement user registration, login, email verification, and more using R2R's built-in authentication capabilities. - - - Explore the available authentication provider options in R2R and how to integrate with your preferred provider. - - - -### Collections - -Collections in R2R enable efficient access control and organization of users and documents. With collections, you can manage permissions and access at a group level. - - - - Discover how to create, manage, and utilize collections in R2R for granular access control and document organization. - - - Learn about best practices for implementing collection permissions and customizing access control in your R2R application. - - - -## Next Steps - -Now that you have a basic understanding of R2R's core features, you can explore more advanced topics: - -- Dive deeper into [document ingestion](/documentation/python-sdk/ingestion) and customization options. -- Learn about [search and RAG](/documentation/python-sdk/retrieval) inside R2R. -- Implement [user authentication](/cookbooks/user-auth) to secure your application. -- Organize your documents using [collections](/cookbooks/collections) for granular access control. - -If you have any questions or need further assistance, please refer to the [R2R documentation](/) or reach out to our support team. diff --git a/docs/favicon.png b/docs/favicon.png deleted file mode 100644 index 30a4cf82e..000000000 Binary files a/docs/favicon.png and /dev/null differ diff --git a/docs/images/airbnb_graph.png b/docs/images/airbnb_graph.png deleted file mode 100644 index a56d4642e..000000000 Binary files a/docs/images/airbnb_graph.png and /dev/null differ diff --git a/docs/images/aristotle.png b/docs/images/aristotle.png deleted file mode 100644 index eae870b73..000000000 Binary files a/docs/images/aristotle.png and /dev/null differ diff --git a/docs/images/azure_deployment.png b/docs/images/azure_deployment.png deleted file mode 100644 index b4fc590ef..000000000 Binary files a/docs/images/azure_deployment.png and /dev/null differ diff --git a/docs/images/azure_security.png b/docs/images/azure_security.png deleted file mode 100644 index 8fc4da98c..000000000 Binary files a/docs/images/azure_security.png and /dev/null differ diff --git a/docs/images/chat-interface.png b/docs/images/chat-interface.png deleted file mode 100644 index b39d18023..000000000 Binary files a/docs/images/chat-interface.png and /dev/null differ diff --git a/docs/images/chat.png b/docs/images/chat.png deleted file mode 100644 index 786fcc898..000000000 Binary files a/docs/images/chat.png and /dev/null differ diff --git a/docs/images/chat_2.png b/docs/images/chat_2.png deleted file mode 100644 index 311a7d980..000000000 Binary files a/docs/images/chat_2.png and /dev/null differ diff --git a/docs/images/checks-passed.png b/docs/images/checks-passed.png deleted file mode 100644 index 3303c7736..000000000 Binary files a/docs/images/checks-passed.png and /dev/null differ diff --git a/docs/images/deploy.png b/docs/images/deploy.png deleted file mode 100644 index d9c8babf9..000000000 Binary files a/docs/images/deploy.png and /dev/null differ diff --git a/docs/images/deployment.png b/docs/images/deployment.png deleted file mode 100644 index 616712ef3..000000000 Binary files a/docs/images/deployment.png and /dev/null differ diff --git a/docs/images/docker_hello_world.png b/docs/images/docker_hello_world.png deleted file mode 100644 index 0165d7d59..000000000 Binary files a/docs/images/docker_hello_world.png and /dev/null differ diff --git a/docs/images/documents.png b/docs/images/documents.png deleted file mode 100644 index e8088d39e..000000000 Binary files a/docs/images/documents.png and /dev/null differ diff --git a/docs/images/enriched.png b/docs/images/enriched.png deleted file mode 100644 index 7b879ca45..000000000 Binary files a/docs/images/enriched.png and /dev/null differ diff --git a/docs/images/external_providers.png b/docs/images/external_providers.png deleted file mode 100644 index c19df0e47..000000000 Binary files a/docs/images/external_providers.png and /dev/null differ diff --git a/docs/images/form.png b/docs/images/form.png deleted file mode 100644 index 07734fbd5..000000000 Binary files a/docs/images/form.png and /dev/null differ diff --git a/docs/images/hatchet_login.png b/docs/images/hatchet_login.png deleted file mode 100644 index 42f383b59..000000000 Binary files a/docs/images/hatchet_login.png and /dev/null differ diff --git a/docs/images/hatchet_long_running.png b/docs/images/hatchet_long_running.png deleted file mode 100644 index ee324cfe5..000000000 Binary files a/docs/images/hatchet_long_running.png and /dev/null differ diff --git a/docs/images/hatchet_running.png b/docs/images/hatchet_running.png deleted file mode 100644 index b769a4939..000000000 Binary files a/docs/images/hatchet_running.png and /dev/null differ diff --git a/docs/images/hatchet_workflow.png b/docs/images/hatchet_workflow.png deleted file mode 100644 index 24a711871..000000000 Binary files a/docs/images/hatchet_workflow.png and /dev/null differ diff --git a/docs/images/hatchet_workflows.png b/docs/images/hatchet_workflows.png deleted file mode 100644 index a0be6fb09..000000000 Binary files a/docs/images/hatchet_workflows.png and /dev/null differ diff --git a/docs/images/hero-dark.svg b/docs/images/hero-dark.svg deleted file mode 100644 index c6a30e88b..000000000 --- a/docs/images/hero-dark.svg +++ /dev/null @@ -1,161 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/images/hero-light.svg b/docs/images/hero-light.svg deleted file mode 100644 index 297d68fb9..000000000 --- a/docs/images/hero-light.svg +++ /dev/null @@ -1,155 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/images/kg_extraction_progress.png b/docs/images/kg_extraction_progress.png deleted file mode 100644 index d04268ec4..000000000 Binary files a/docs/images/kg_extraction_progress.png and /dev/null differ diff --git a/docs/images/kg_ingestion_status.png b/docs/images/kg_ingestion_status.png deleted file mode 100644 index b18f6cebb..000000000 Binary files a/docs/images/kg_ingestion_status.png and /dev/null differ diff --git a/docs/images/login.png b/docs/images/login.png deleted file mode 100644 index 0848e2bce..000000000 Binary files a/docs/images/login.png and /dev/null differ diff --git a/docs/images/logs.png b/docs/images/logs.png deleted file mode 100644 index 7c9ba48a5..000000000 Binary files a/docs/images/logs.png and /dev/null differ diff --git a/docs/images/neo4j_create_project.png b/docs/images/neo4j_create_project.png deleted file mode 100644 index 031969421..000000000 Binary files a/docs/images/neo4j_create_project.png and /dev/null differ diff --git a/docs/images/neo4j_create_project_2.png b/docs/images/neo4j_create_project_2.png deleted file mode 100644 index aa9aa60c3..000000000 Binary files a/docs/images/neo4j_create_project_2.png and /dev/null differ diff --git a/docs/images/neo4j_download.png b/docs/images/neo4j_download.png deleted file mode 100644 index 6119b89b9..000000000 Binary files a/docs/images/neo4j_download.png and /dev/null differ diff --git a/docs/images/neo4j_local_dbms.png b/docs/images/neo4j_local_dbms.png deleted file mode 100644 index 1fa7aba56..000000000 Binary files a/docs/images/neo4j_local_dbms.png and /dev/null differ diff --git a/docs/images/neo4j_plugins_page.png b/docs/images/neo4j_plugins_page.png deleted file mode 100644 index b3e120159..000000000 Binary files a/docs/images/neo4j_plugins_page.png and /dev/null differ diff --git a/docs/images/neo4j_project_page.png b/docs/images/neo4j_project_page.png deleted file mode 100644 index beb856c01..000000000 Binary files a/docs/images/neo4j_project_page.png and /dev/null differ diff --git a/docs/images/oss_collections_page.png b/docs/images/oss_collections_page.png deleted file mode 100644 index 8a94c028c..000000000 Binary files a/docs/images/oss_collections_page.png and /dev/null differ diff --git a/docs/images/oss_dashboard_analytics.png b/docs/images/oss_dashboard_analytics.png deleted file mode 100644 index 9ece7dc73..000000000 Binary files a/docs/images/oss_dashboard_analytics.png and /dev/null differ diff --git a/docs/images/oss_dashboard_documents.png b/docs/images/oss_dashboard_documents.png deleted file mode 100644 index 073eaaa38..000000000 Binary files a/docs/images/oss_dashboard_documents.png and /dev/null differ diff --git a/docs/images/overview.png b/docs/images/overview.png deleted file mode 100644 index 094f3d545..000000000 Binary files a/docs/images/overview.png and /dev/null differ diff --git a/docs/images/playground.png b/docs/images/playground.png deleted file mode 100644 index 7e16e8e49..000000000 Binary files a/docs/images/playground.png and /dev/null differ diff --git a/docs/images/quickstart.gif b/docs/images/quickstart.gif deleted file mode 100644 index 16e44b5c7..000000000 Binary files a/docs/images/quickstart.gif and /dev/null differ diff --git a/docs/images/r2r.png b/docs/images/r2r.png deleted file mode 100644 index 2bfda75f9..000000000 Binary files a/docs/images/r2r.png and /dev/null differ diff --git a/docs/images/r2r_arch.png b/docs/images/r2r_arch.png deleted file mode 100644 index 4fdb0d93e..000000000 Binary files a/docs/images/r2r_arch.png and /dev/null differ diff --git a/docs/images/r2r_webdev_template.png b/docs/images/r2r_webdev_template.png deleted file mode 100644 index 5ecacada0..000000000 Binary files a/docs/images/r2r_webdev_template.png and /dev/null differ diff --git a/docs/images/settings_config.png b/docs/images/settings_config.png deleted file mode 100644 index a576e17e9..000000000 Binary files a/docs/images/settings_config.png and /dev/null differ diff --git a/docs/images/settings_prompts.png b/docs/images/settings_prompts.png deleted file mode 100644 index 3316cb22e..000000000 Binary files a/docs/images/settings_prompts.png and /dev/null differ diff --git a/docs/images/sf_graph.png b/docs/images/sf_graph.png deleted file mode 100644 index da3cc1489..000000000 Binary files a/docs/images/sf_graph.png and /dev/null differ diff --git a/docs/images/simple_graph.png b/docs/images/simple_graph.png deleted file mode 100644 index 6d58eb1b8..000000000 Binary files a/docs/images/simple_graph.png and /dev/null differ diff --git a/docs/images/ssh_print.png b/docs/images/ssh_print.png deleted file mode 100644 index 246825abf..000000000 Binary files a/docs/images/ssh_print.png and /dev/null differ diff --git a/docs/images/users.png b/docs/images/users.png deleted file mode 100644 index b910af67a..000000000 Binary files a/docs/images/users.png and /dev/null differ diff --git a/docs/images/vecdb.png b/docs/images/vecdb.png deleted file mode 100644 index c31a3c8ef..000000000 Binary files a/docs/images/vecdb.png and /dev/null differ diff --git a/docs/images/watch.png b/docs/images/watch.png deleted file mode 100644 index bc37f729f..000000000 Binary files a/docs/images/watch.png and /dev/null differ diff --git a/docs/images/yc_s24.png b/docs/images/yc_s24.png deleted file mode 100644 index 65fa1a952..000000000 Binary files a/docs/images/yc_s24.png and /dev/null differ diff --git a/docs/inkeep.js b/docs/inkeep.js deleted file mode 100644 index 74982556f..000000000 --- a/docs/inkeep.js +++ /dev/null @@ -1,117 +0,0 @@ -// customize -const inkeepSettings = { - isOpen: true, - baseSettings: { - apiKey: '3adc15f45617826bb73c0a4cb9fb3a06547b6ab7f90c819e', - integrationId: 'clv1yuc8f0002vzhxbzwy26yx', - organizationId: 'clu6yudh9000sajgn7nwxfve8', - primaryBrandColor: '#26D6FF', // your brand color, widget color scheme is derived from this - organizationDisplayName: 'R2R', - theme: { - colorMode: { - forcedColorMode: 'dark', // to sync dark mode with the widget - }, - }, - }, - aiChatSettings: { - // ...optional settings - botAvatarSrcUrl: - "https://www.sciphi.ai/screenshots/logo222_cut.png", - quickQuestions: [ - "How do I get started?", - "How does R2R implement hybrid search?", - "How do I use the R2R API?", - // "How do I use ", - // "Example question 3?", - ], - }, - modalSettings: { - isShortcutKeyEnabled: false, // disable default cmd+k behavior - // ...optional settings - }, - }; - - // The Mintlify search triggers, which we'll reuse to trigger the Inkeep modal - const searchButtonContainerIds = [ - "search-bar-entry", - "search-bar-entry-mobile", - ]; - - // Clone and replace, needed to remove existing event listeners - const clonedSearchButtonContainers = searchButtonContainerIds.map((id) => { - const originalElement = document.getElementById(id); - const clonedElement = originalElement.cloneNode(true); - originalElement.parentNode.replaceChild(clonedElement, originalElement); - - return clonedElement; - }); - - // Load the Inkeep component library - const inkeepScript = document.createElement("script"); - inkeepScript.type = "module"; - inkeepScript.src = - "https://unpkg.com/@inkeep/widgets-embed@latest/dist/embed.js"; - document.body.appendChild(inkeepScript); - - // Once the Inkeep library is loaded, instantiate the UI components - inkeepScript.addEventListener("load", function () { - // Customization settings - - // for syncing with dark mode - const colorModeSettings = { - observedElement: document.documentElement, - isDarkModeCallback: (el) => { - return el.classList.contains("dark"); - }, - colorModeAttribute: "class", - }; - - // Instantiate the "Ask AI" pill chat button - Inkeep().embed({ - componentType: "ChatButton", - colorModeSync: colorModeSettings, - properties: inkeepSettings, - }); - - // Instantiate the search bar modal - const inkeepSearchModal = Inkeep({ - ...inkeepSettings.baseSettings, - }).embed({ - componentType: "CustomTrigger", - colorModeSync: colorModeSettings, - properties: { - ...inkeepSettings, - isOpen: false, - onClose: () => { - inkeepSearchModal.render({ - isOpen: false, - }); - }, - }, - }); - - // When the Mintlify search bar elements are clicked, open the Inkeep search modal - clonedSearchButtonContainers.forEach((trigger) => { - trigger.addEventListener("click", function () { - inkeepSearchModal.render({ - isOpen: true, - }); - }); - }); - - // Open the Inkeep Modal with cmd+k - window.addEventListener( - "keydown", - (event) => { - if ( - (event.metaKey || event.ctrlKey) && - (event.key === "k" || event.key === "K") - ) { - event.stopPropagation(); - inkeepSearchModal.render({ isOpen: true }); - return false; - } - }, - true - ); - }); diff --git a/docs/introduction.mdx b/docs/introduction.mdx deleted file mode 100644 index e16fb26dc..000000000 --- a/docs/introduction.mdx +++ /dev/null @@ -1,59 +0,0 @@ ---- -title: 'Introduction' -description: 'Build, scale, and manage user-facing Retrieval-Augmented Generation applications.' -icon: 'flag-checkered' ---- - -import GithubButtons from '../components/GithubButtons'; - - -![r2r](./images/r2r.png) - -R2R (RAG to Riches), the Elasticsearch for RAG, bridges the gap between experimenting with and deploying state of the art Retrieval-Augmented Generation (RAG) applications. It's a complete platform that helps you quickly build and launch scalable RAG solutions. Built around a containerized [RESTful API](/api-reference/introduction), R2R offers multimodal ingestion support, hybrid search, GraphRAG, user & document management, and observability / analytics features. - -## Key Features -- [**📁 Multimodal Ingestion**](/documentation/configuration/ingestion/overview): Parse `.txt`, `.pdf`, `.json`, `.png`, `.mp3`, and more. -- [**🔍 Hybrid Search**](/cookbooks/hybrid-search): Combine semantic and keyword search with reciprocal rank fusion for enhanced relevancy. -- [**🔗 Graph RAG**](/cookbooks/graphrag): Automatically extract relationships and build knowledge graphs. -- [**🗂️ App Management**](/cookbooks/user-auth): Efficiently manage documents and users with full authentication. -- [**🔭 Observability**](/cookbooks/observability): Observe and analyze your RAG engine performance. -- [**🧩 Configurable**](/documentation/configuration/introduction): Provision your application using intuitive configuration files. -- [**🖥️ Dashboard**](https://github.com/SciPhi-AI/R2R-Dashboard): An open-source React+Next.js app with optional authentication, to interact with R2R via GUI. - -## Getting Started - -- [Installation](/documentation/installation): Quick installation of R2R using Docker or pip -- [Quickstart](/documentation/quickstart): A quick introduction to R2R's core features -- [Setup](/documentation/configuration/introduction): Learn how to setup and configure R2R - -## API & SDKs - -- [SDK](/documentation/python-sdk): API reference and Python/JS SDKs for interacting with R2R -- [API](/api-reference/introduction): API reference and Python/JS SDKs for interacting with R2R -- [Configuration](/documentation/configuration): A guide on how to configure your R2R system -- [SciPhi Website](https://sciphi.ai/): Explore a managed AI solution powered by R2R. -- [Contact Us](mailto:founders@sciphi.ai): Get in touch with our team to discuss your specific needs. - -## Cookbooks - -- Advanced RAG Pipelines - - [RAG Agent](/cookbooks/agent): R2R's powerful RAG agent - - [Hybrid Search](/cookbooks/hybrid-search): Introduction to hybrid search - - [Advanced RAG](/cookbooks/advanced-rag): Advanced RAG features - -- Knowledge Graphs - - [GraphRAG](/cookbooks/graphrag): Walkthrough of GraphRAG - -- Orchestration - - [GraphRAG](/cookbooks/orchestration): R2R event orchestration - -- Auth & Admin Features - - [Web Development](/cookbooks/web-dev): Building webapps using R2R - - [User Auth](/cookbooks/user-auth): Authenticating users - - [Collections](/cookbooks/collections): Document collections - - [Analytics & Observability](/cookbooks/observability): End-to-end logging and analytics - - [Web Application](/cookbooks/application): Connecting with the R2R Application - -## Community - -[Join our Discord server](https://discord.gg/p6KqD2kjtB) to get support and connect with both the R2R team and other developers in the community. Whether you're encountering issues, looking for advice on best practices, or just want to share your experiences, we're here to help. diff --git a/docs/introduction/how-r2r-works.mdx b/docs/introduction/how-r2r-works.mdx deleted file mode 100644 index aef552819..000000000 --- a/docs/introduction/how-r2r-works.mdx +++ /dev/null @@ -1,161 +0,0 @@ ---- -title: "How R2R works" -icon: 'gear' ---- - -**On this page** -1. Core Architecture -2. Document Processing Pipeline -3. Search and Retrieval System -4. Response Generation -5. System Components - -## Core Architecture - -R2R operates as a distributed system with several key components: - -**API Layer** -- RESTful API for all operations -- Authentication and access control -- Request routing and validation - -**Storage Layer** -- Document storage -- Vector embeddings -- User and permission data -- Knowledge graphs - -**Processing Pipeline** -- Document parsing -- Chunking and embedding -- Relationship extraction -- Task orchestration - -## Document Processing Pipeline - -When you ingest a document into R2R: - -1. **Document Parsing** - - Files are processed based on type (PDF, text, images, etc.) - - Text is extracted and cleaned - - Metadata is preserved - -2. **Chunking** - - Documents are split into semantic units - - Chunk size and overlap are configurable - - Headers and structure are maintained - -3. **Embedding Generation** - - Each chunk is converted to a vector embedding - - Multiple embedding models supported - - Embeddings are optimized for search - -4. **Knowledge Graph Creation** - - Relationships between chunks are identified - - Entities are extracted and linked - - Graph structure is built and maintained - -## Search and Retrieval System - -R2R uses a sophisticated search system: - -**Vector Search** -- High-dimensional vector similarity search -- Optimized indices for fast retrieval -- Configurable distance metrics - -**Hybrid Search** -``` -Query → [Vector Search Branch] → Semantic Results - → [Keyword Search Branch] → Lexical Results - → [Fusion Layer] → Final Ranked Results -``` - -**Ranking** -- Reciprocal rank fusion -- Configurable weights -- Result deduplication - -## Response Generation - -When generating responses: - -1. **Context Building** - - Relevant chunks are retrieved - - Context is formatted for the LLM - - Citations are prepared - -2. **LLM Integration** - - Context is combined with the query - - System prompts guide response format - - Streaming support for real-time responses - -3. **Post-processing** - - Response validation - - Citation linking - - Format cleaning - -## System Components - -R2R consists of several integrated services: - -**Core Services** -``` -┌─────────────┐ ┌─────────────┐ ┌─────────────┐ -│ API │ ↔ │ Processor │ ↔ │ Storage │ -└─────────────┘ └─────────────┘ └─────────────┘ - ↕ ↕ ↕ -┌─────────────┐ ┌─────────────┐ ┌─────────────┐ -│ Auth Server │ ↔ │ Orchestrator│ ↔ │ Search │ -└─────────────┘ └─────────────┘ └─────────────┘ -``` - -**Database Layer** -- PostgreSQL for structured data -- pgvector for vector storage -- Graph data for relationships - -**External Integrations** -- LLM providers (OpenAI, Anthropic, etc.) -- Authentication providers -- Storage systems - -## Performance Considerations - -R2R optimizes for several key metrics: - -**Latency** -- Cached embeddings -- Optimized vector indices -- Request batching - -**Scalability** -- Horizontal scaling support -- Distributed processing -- Load balancing - -**Reliability** -- Task queuing -- Error handling -- Automatic retries - -## Resource Management - -R2R efficiently manages system resources: - -1. **Memory Usage** - - Vector index optimization - - Chunk size management - - Cache control - -2. **Processing Power** - - Parallel processing - - Batch operations - - Priority queuing - -3. **Storage** - - Efficient vector storage - - Document versioning - - Metadata indexing - -For detailed deployment configurations and optimization strategies, refer to our [Configuration Guide](/documentation/configuration). diff --git a/docs/introduction/rag.mdx b/docs/introduction/rag.mdx deleted file mode 100644 index f17ae4966..000000000 --- a/docs/introduction/rag.mdx +++ /dev/null @@ -1,128 +0,0 @@ ---- -title: "More about RAG" -icon: 'brain' ---- - -**On this page** -1. Before you begin -2. What is RAG? -3. Set up RAG with R2R -4. Configure RAG settings -5. How RAG works in R2R - -RAG (Retrieval-Augmented Generation) combines the power of large language models with precise information retrieval from your own documents. When users ask questions, RAG first retrieves relevant information from your document collection, then uses this context to generate accurate, contextual responses. This ensures AI responses are both relevant and grounded in your specific knowledge base. - -**Before you begin** - -RAG in R2R has the following requirements: -* A running R2R instance (local or deployed) -* Access to an LLM provider (OpenAI, Anthropic, or local models) -* Documents ingested into your R2R system -* Basic configuration for document processing and embedding generation - -## What is RAG? - -RAG operates in three main steps: -1. **Retrieval**: Finding relevant information from your documents -2. **Augmentation**: Adding this information as context for the AI -3. **Generation**: Creating responses using both the context and the AI's knowledge - -Benefits over traditional LLM applications: -* More accurate responses based on your specific documents -* Reduced hallucination by grounding answers in real content -* Ability to work with proprietary or recent information -* Better control over AI outputs - -## Set up RAG with R2R - -To start using RAG in R2R: - -1. Install and start R2R: -```bash -pip install r2r -r2r serve --docker -``` - -2. Ingest your documents: -```bash -r2r ingest-files --file-paths /path/to/your/documents -``` - -3. Test basic RAG functionality: -```bash -r2r rag --query="your question here" -``` - -## Configure RAG settings - -R2R offers several ways to customize RAG behavior: - -1. **Retrieval Settings**: -```python -# Using hybrid search (combines semantic and keyword search) -client.rag( - query="your question", - vector_search_settings={"use_hybrid_search": True} -) - -# Adjusting number of retrieved chunks -client.rag( - query="your question", - vector_search_settings={"top_k": 5} -) -``` - -2. **Generation Settings**: -```python -# Adjusting response style -client.rag( - query="your question", - rag_generation_config={ - "temperature": 0.7, - "model": "openai/gpt-4" - } -) -``` - -## How RAG works in R2R - -R2R's RAG implementation uses a sophisticated pipeline: - -**Document Processing** -* Documents are split into semantic chunks -* Each chunk is embedded using AI models -* Chunks are stored with metadata and relationships - -**Retrieval Process** -* Queries are processed using hybrid search -* Both semantic similarity and keyword matching are considered -* Results are ranked by relevance scores - -**Response Generation** -* Retrieved chunks are formatted as context -* The LLM generates responses using this context -* Citations and references can be included - -**Advanced Features** -* GraphRAG for relationship-aware responses -* Multi-step RAG for complex queries -* Agent-based RAG for interactive conversations - -## Best Practices - -1. **Document Processing** - * Use appropriate chunk sizes (256-1024 tokens) - * Maintain document metadata - * Consider document relationships - -2. **Query Optimization** - * Use hybrid search for better retrieval - * Adjust relevance thresholds - * Monitor and analyze search performance - -3. **Response Generation** - * Balance temperature for creativity vs accuracy - * Use system prompts for consistent formatting - * Implement error handling and fallbacks - -For more detailed information, visit our [RAG Configuration Guide](/documentation/configuration/rag) or try our [Quickstart](/documentation/quickstart). diff --git a/docs/introduction/system.mdx b/docs/introduction/system.mdx deleted file mode 100644 index 3d0684921..000000000 --- a/docs/introduction/system.mdx +++ /dev/null @@ -1,93 +0,0 @@ ---- -title: 'System' -icon: 'server' -description: 'Learn about the R2R system architecture' ---- - -## System Diagram - - -```mermaid -graph TD - U((User)) -->|Query| GW[Traefik Gateway] - GW -->|Route| API[R2R API Cluster] - API -->|Authenticate| AS[Auth Service] - - R2R[R2R Application] -->|Use| API - - subgraph "Core Services" - AS - ReS[Retrieval Service] - IS[Ingestion Service] - GBS[Graph Builder Service] - AMS[App Management Service] - end - - subgraph "Providers" - EP[Embedding Provider] - LP[LLM Provider] - AP[Auth Provider] - IP[Ingestion Provider] - end - - IS & GBS & ReS <-->|Coordinate| O - - ReS -->|Use| EP - ReS -->|Use| LP - IS -->|Use| EP - IS -->|Use| IP - GBS -->|Use| LP - AS -->|Use| AP - - subgraph "Orchestration" - O[Orchestrator] - RMQ[RabbitMQ] - O <-->|Use| RMQ - end - - subgraph "Storage" - PG[(Postgres + pgvector)] - FS[File Storage] - end - - AS & AMS & ReS -->|Use| PG - GBS & ReS -->|Use| Neo - IS -->|Use| FS - - classDef gateway fill:#2b2b2b,stroke:#ffffff,stroke-width:2px; - classDef api fill:#4444ff,stroke:#ffffff,stroke-width:2px; - classDef orchestrator fill:#007acc,stroke:#ffffff,stroke-width:2px; - classDef messagequeue fill:#2ca02c,stroke:#ffffff,stroke-width:2px; - classDef storage fill:#336791,stroke:#ffffff,stroke-width:2px; - classDef providers fill:#ff7f0e,stroke:#ffffff,stroke-width:2px; - classDef auth fill:#ff0000,stroke:#ffffff,stroke-width:2px; - classDef application fill:#9932cc,stroke:#ffffff,stroke-width:2px; - class GW gateway; - class API api; - class O orchestrator; - class RMQ messagequeue; - class PG,Neo,FS storage; - class EP,LP,AP,IP providers; - class AS auth; - class R2R application; -``` - -## System Overview - -R2R is built on a modular, service-oriented architecture designed for scalability and flexibility: - -1. **API Layer**: A FastAPI-based cluster handles incoming requests, routing them to appropriate services. - -2. **Core Services**: Specialized services for authentication, retrieval, ingestion, graph building, and app management. - -3. **Orchestration**: Manages complex workflows and long-running tasks using a message queue system. - -4. **Storage**: Utilizes PostgreSQL with pgvector for vector storage and search, and graph search. - -5. **Providers**: Pluggable components for embedding, LLM, auth, and ingestion services, supporting multimodal ingestion and flexible model integration. - -6. **R2R Application**: A React+Next.js app providing a user interface for interacting with the R2R system. - -This architecture enables R2R to handle everything from simple RAG applications to complex, production-grade systems with advanced features like hybrid search and GraphRAG. - -Ready to get started? Check out our [Docker installation guide](/documentation/installation/full/docker) and [Quickstart tutorial](/documentation/quickstart) to begin your R2R journey. diff --git a/docs/introduction/what-is-r2r.mdx b/docs/introduction/what-is-r2r.mdx deleted file mode 100644 index 6b692e6fa..000000000 --- a/docs/introduction/what-is-r2r.mdx +++ /dev/null @@ -1,68 +0,0 @@ ---- -title: "What is R2R?" -icon: 'bolt' ---- - - - -**On this page** -1. What does R2R do? -2. What can R2R do for my applications? -3. What can R2R do for my developers? -4. What can R2R do for my business? -5. Getting started - -Companies like OpenAI, Anthropic, and Google have shown the incredible potential of AI for understanding and generating human language. But building reliable AI applications that can work with your organization's specific knowledge and documents requires significant expertise and infrastructure. Your company isn't an AI infrastructure company: **it doesn't make sense for you to build a complete RAG system from scratch.** - -R2R (RAG to Riches) provides the infrastructure and tools to help you implement **efficient, scalable, and reliable AI-powered document understanding** in your applications. - -## What does R2R do? - -R2R consists of three main components: **document processing**, **AI-powered search**, and **analytics**. The document processing and search capabilities make it easier for your developers to create intelligent applications that can understand and work with your organization's knowledge. The analytics tools enable your teams to monitor performance, understand usage patterns, and continuously improve the system. - -## What can R2R do for my applications? - -R2R provides your applications with production-ready RAG capabilities: -- Fast and accurate document search using both semantic and keyword matching -- Intelligent document processing that works with PDFs, images, audio, and more -- Automatic relationship extraction to build knowledge graphs -- Built-in user management and access controls -- Simple integration through REST APIs and SDKs - -## What can R2R do for my developers? - -R2R provides a complete toolkit that simplifies building AI-powered applications: -- **Ready-to-use Docker deployment** for quick setup and testing -- **Python and JavaScript SDKs** for easy integration -- **RESTful API** for language-agnostic access -- **Built-in orchestration** for handling large-scale document processing -- **Flexible configuration** through intuitive config files -- **Comprehensive documentation** and examples -- **Local deployment option** for working with sensitive data - -## What can R2R do for my business? - -R2R provides the infrastructure to build AI applications that can: -- **Make your documents searchable** with state-of-the-art AI -- **Answer questions** using your organization's knowledge -- **Process and understand** documents at scale -- **Secure sensitive information** through built-in access controls -- **Monitor usage and performance** through analytics -- **Scale efficiently** as your needs grow - -## Getting Started - -The fastest way to start with R2R is through Docker: -```bash -pip install r2r -r2r serve --docker -``` - -This gives you a complete RAG system running at http://localhost:7272 with: -- Document processing pipeline -- Vector search capabilities -- GraphRAG features -- User management -- Analytics dashboard - -Visit our [Quickstart Guide](/documentation/quickstart) to begin building with R2R. diff --git a/docs/introduction/whats-new.mdx b/docs/introduction/whats-new.mdx deleted file mode 100644 index ab8e6e0a8..000000000 --- a/docs/introduction/whats-new.mdx +++ /dev/null @@ -1,15 +0,0 @@ ---- -title: "What's New" -description: 'Changelog' -icon: 'bell' ---- - -## Version 0.3.20 — Sep. 6, 2024 - -### New Features -- [R2R Light](https://r2r-docs.sciphi.ai/documentation/installation/light/local-system) installation added -- Removed Neo4j and implemented GraphRAG inside of Postgres -- Improved efficiency and configurability of knowledge graph construction process - -### Bug Fixes -- Minor bug fixes around config logic and other. diff --git a/docs/logo/sciphi_dark.png b/docs/logo/sciphi_dark.png deleted file mode 100644 index 6e368c7dc..000000000 Binary files a/docs/logo/sciphi_dark.png and /dev/null differ diff --git a/docs/logo/sciphi_light.png b/docs/logo/sciphi_light.png deleted file mode 100644 index 629684854..000000000 Binary files a/docs/logo/sciphi_light.png and /dev/null differ diff --git a/docs/mint.json b/docs/mint.json deleted file mode 100644 index c40bce1ee..000000000 --- a/docs/mint.json +++ /dev/null @@ -1,454 +0,0 @@ -{ - "$schema": "https://mintlify.com/schema.json", - "name": "Build, scale, and manage user-facing Retrieval-Augmented Generation applications.", - "logo": { - "dark": "/logo/sciphi_dark.png", - "light": "/logo/sciphi_light.png" - }, - "redirects": [ - { - "source": "/api-reference", - "destination": "/api-reference/introduction" - } - ], - "analytics": { - "posthog": { - "apiKey": "phc_OPBbibOIErCGc4NDLQsOrMuYFTKDmRwXX6qxnTr6zpU" - } - }, - "feedback": {"thumbsRating": true, "suggestEdit": true}, - "favicon": "/favicon.png", - "colors": { - "primary": "#3f54be", - "light": "#a0aadf", - "dark": "#a0aadf", - "anchors": { - "from": "#0D9373", - "to": "#07C983" - } - }, - "topbarLinks": [ - { - "name": "Support", - "url": "mailto:founders@sciphi.ai" - } - ], - "topbarCtaButton": { - "name": "R2R GitHub", - "url": "https://github.com/SciPhi-AI/R2R" - }, - "primaryTab": { - "name": "Introduction" - }, - "tabs": [ - { - "name": "Documentation", - "url": "documentation" - }, - { - "name": "API Reference", - "url": "api-reference" - }, - { - "name": "Cookbooks", - "url": "cookbooks" - } - ], - "anchors": [ - { - "name": "Community", - "icon": "discord", - "url": "https://discord.gg/p6KqD2kjtB" - } - ], - "navigation": [ - { - "group": "", - "pages": [ - "introduction", - "introduction/system", - "introduction/whats-new", - { - "group": "Guides", - "pages": [ - "introduction/what-is-r2r", - "introduction/rag", - "introduction/how-r2r-works" - ] - } - ] - }, - { - "group": "Getting Started", - "pages": [ - { - "group": "Installation", - "icon": "bars-progress", - "pages" : [ - "documentation/installation/overview", - { - "group": "Light", - "icon": "feather", - "pages": [ - "documentation/installation/light/local-system", - "documentation/installation/light/docker" - ] - }, - { - "group": "Full", - "icon": "gem", - "pages": [ - "documentation/installation/full/docker", - "documentation/installation/full/local-system" - ] - } - - ] - }, - "documentation/quickstart", - { - "group": "API & SDKs", - "icon": "code", - "pages" : [ - { - "icon": "python", - "group": "Python SDK", - "pages": [ - "documentation/python-sdk/introduction", - "documentation/python-sdk/ingestion", - "documentation/python-sdk/graphrag", - "documentation/python-sdk/retrieval", - "documentation/python-sdk/auth", - "documentation/python-sdk/collections", - "documentation/python-sdk/observability", - "documentation/python-sdk/conversations" - ] - }, - { - "icon": "js", - "group": "JS SDK", - "pages": [ - "documentation/js-sdk/introduction", - "documentation/js-sdk/ingestion", - "documentation/js-sdk/retrieval", - "documentation/js-sdk/auth", - "documentation/js-sdk/collections", - "documentation/js-sdk/observability", - "documentation/js-sdk/conversations" - ] - }, - { - "icon": "desktop", - "group": "CLI", - "pages": [ - "documentation/cli/introduction", - "documentation/cli/server", - "documentation/cli/ingestion", - "documentation/cli/retrieval", - "documentation/cli/graph", - "documentation/cli/management", - "documentation/cli/maintenance" - ] - }, - - "documentation/api-reference" - ] - }, - "documentation/glossary" - ] - }, - { - "group": "Setup", - "pages": [ - { - "group": "Core Services", - "icon": "circle-exclamation", - "pages": [ - "documentation/configuration/introduction", - "documentation/configuration/postgres", - "documentation/configuration/llm", - "documentation/configuration/rag", - "documentation/configuration/prompts" - ] - }, - { - "group": "Ingestion", - "icon": "upload", - "pages": [ - "documentation/configuration/ingestion/overview", - "documentation/configuration/ingestion/parsing_and_chunking", - "documentation/configuration/ingestion/embedding" - ] - }, - { - "group": "Knowledge Graph", - "icon": "diagram-project", - "pages": [ - "documentation/configuration/knowledge-graph/overview", - "documentation/configuration/knowledge-graph/enrichment" - ] - }, - { - "group": "Retrieval", - "icon": "magnifying-glass", - "pages": [ - "documentation/configuration/retrieval/overview", - "documentation/configuration/retrieval/vector-search", - "documentation/configuration/retrieval/knowledge-graph" - - ] - }, - "documentation/local-rag" - ] - }, - { - "group": "Deployment", - "pages": [ - "documentation/deployment/introduction", - { - "group": "Cloud Providers", - "icon": "cloud", - "pages": [ - "documentation/deployment/azure", - "documentation/deployment/aws", - "documentation/deployment/gcp", - "documentation/deployment/sciphi" - ] - }, - { - "group": "Troubleshooting", - "icon": "wrench", - "pages": [ - "documentation/deployment/troubleshooting/index", - { - "group": "Common Installation Issues", - "pages": [ - "documentation/deployment/troubleshooting/docker", - "documentation/deployment/troubleshooting/environment", - "documentation/deployment/troubleshooting/resources", - "documentation/deployment/troubleshooting/dependencies", - "documentation/deployment/troubleshooting/services", - "documentation/deployment/troubleshooting/unstructured" - ] - }, - { - "group": "Deployment Problems", - "pages": [ - "documentation/deployment/troubleshooting/timeouts", - "documentation/deployment/troubleshooting/insufficient_resources", - "documentation/deployment/troubleshooting/firewall", - "documentation/deployment/troubleshooting/port_conflicts", - "documentation/deployment/troubleshooting/database", - "documentation/deployment/troubleshooting/local_llm" - ] - }, - { - "group": "Runtime Errors", - "pages": [ - "documentation/deployment/troubleshooting/api_connections", - "documentation/deployment/troubleshooting/api_responses", - "documentation/deployment/troubleshooting/slow_queries", - "documentation/deployment/troubleshooting/high_usage" - ] - }, - { - "group": "Configuration Troubleshooting", - "pages": [ - "documentation/deployment/troubleshooting/missing_keys", - "documentation/deployment/troubleshooting/incorrect_credentials", - "documentation/deployment/troubleshooting/toml_errors", - "documentation/deployment/troubleshooting/bad_configuration" - ] - }, - { - "group": "Component-specific Issues", - "pages": [ - "documentation/deployment/troubleshooting/vector_store_issues", - "documentation/deployment/troubleshooting/connection_strings", - "documentation/deployment/troubleshooting/workflows" - ] - }, - { - "group": "R2R Problems", - "pages": [ - "documentation/deployment/troubleshooting/r2r_logs", - "documentation/deployment/troubleshooting/r2r_health" - ] - } - ] - } - ] - }, - { - "group": "Deep Dives", - "pages":[ - { - "group": "R2R Main", - "pages": [ - "documentation/deep-dive/main/introduction", - "documentation/deep-dive/main/config", - "documentation/deep-dive/main/builder" - ] - }, - { - "group": "Providers", - "pages": [ - "documentation/deep-dive/providers/auth", - "documentation/deep-dive/providers/llms", - "documentation/deep-dive/providers/embeddings", - "documentation/deep-dive/providers/database", - "documentation/deep-dive/providers/knowledge-graph" - ] - }, - { - "group": "Other", - "pages": [ - "documentation/deep-dive/other/telemetry" - ] - } - ] - }, - { - "group": "API Documentation", - "pages": [ - "api-reference/introduction" - ] - }, - { - "group": "Document Ingestion", - "pages": [ - "api-reference/endpoint/ingest_files", - "api-reference/endpoint/ingest_chunks", - "api-reference/endpoint/update_files", - "api-reference/endpoint/create_vector_index", - "api-reference/endpoint/delete_vector_index", - "api-reference/endpoint/list_vector_indices" - ] - }, - { - "group": "Knowledge Graph", - "pages": [ - "api-reference/endpoint/create_graph", - "api-reference/endpoint/deduplicate_entities", - "api-reference/endpoint/enrich_graph", - "api-reference/endpoint/entities", - "api-reference/endpoint/triples", - "api-reference/endpoint/communities", - "api-reference/endpoint/delete_entities_and_triples" - ] - }, - { - "group": "Retrieval", - "pages": [ - "api-reference/endpoint/search", - "api-reference/endpoint/rag", - "api-reference/endpoint/agent", - "api-reference/endpoint/completion" - ] - }, - { - "group": "User Documents", - "pages": [ - "api-reference/endpoint/document_chunks", - "api-reference/endpoint/delete", - "api-reference/endpoint/documents_overview" - ] - }, - { - "group": "User Auth", - "pages": [ - "api-reference/endpoint/register", - "api-reference/endpoint/verify_email", - "api-reference/endpoint/login", - "api-reference/endpoint/get_user_info", - "api-reference/endpoint/put_user_info", - "api-reference/endpoint/delete_user_info", - "api-reference/endpoint/refresh_access_token", - "api-reference/endpoint/logout", - "api-reference/endpoint/change_password", - "api-reference/endpoint/request_password_reset" - ] - }, - { - "group": "Collections", - "pages": [ - "api-reference/endpoint/create_collection", - "api-reference/endpoint/get_collection", - "api-reference/endpoint/update_collection", - "api-reference/endpoint/delete_collection", - "api-reference/endpoint/list_collections", - "api-reference/endpoint/add_user_to_collection", - "api-reference/endpoint/remove_user_from_collection", - "api-reference/endpoint/get_users_in_collection", - "api-reference/endpoint/get_collections_for_user", - "api-reference/endpoint/collections_overview", - "api-reference/endpoint/assign_document_to_collection", - "api-reference/endpoint/remove_document_from_collection", - "api-reference/endpoint/get_document_collections" - ] - }, - - { - "group": "Admin", - "pages": [ - "api-reference/endpoint/update_prompt", - "api-reference/endpoint/add_prompt", - "api-reference/endpoint/get_prompt", - "api-reference/endpoint/get_all_prompts", - "api-reference/endpoint/delete_prompt", - "api-reference/endpoint/logs", - "api-reference/endpoint/users_overview", - "api-reference/endpoint/app_settings" - ] - }, - { - "group": "Analytics and Observability", - "pages": [ - "api-reference/endpoint/analytics" - ] - }, - { - "group": "Status", - "pages": [ - "api-reference/endpoint/health" - ] - }, - { - "group": "General", - "pages": [ - "cookbooks/walkthrough", - "cookbooks/ingestion", - "cookbooks/contextual-enrichment", - "cookbooks/hybrid-search", - "cookbooks/advanced-rag", - "cookbooks/graphrag", - "cookbooks/advanced-graphrag", - "cookbooks/agent", - "cookbooks/orchestration", - "cookbooks/maintenance", - "cookbooks/web-dev" - ] - }, - { - "group": "Auth & Admin", - "pages": [ - "cookbooks/user-auth", - "cookbooks/collections", - "cookbooks/observability", - "cookbooks/application" - ] - }, - { - "group": "Deployment", - "pages": [ - "cookbooks/remote-cloud" - ] - } - ], - "footerSocials": { - "github": "https://github.com/SciPhi-AI", - "linkedin": "https://www.linkedin.com/company/sciphi-ai", - "discord": "https://discord.gg/p6KqD2kjtB", - "twitter": "https://twitter.com/ocolegro" - } -} diff --git a/docs/snippets/snippet-intro.mdx b/docs/snippets/snippet-intro.mdx deleted file mode 100644 index c57e7c756..000000000 --- a/docs/snippets/snippet-intro.mdx +++ /dev/null @@ -1,4 +0,0 @@ -One of the core principles of software development is DRY (Don't Repeat -Yourself). This is a principle that apply to documentation as -well. If you find yourself repeating the same content in multiple places, you -should consider creating a custom snippet to keep your content in sync. diff --git a/js/sdk/__tests__/ChunksIntegrationSuperUser.test.ts b/js/sdk/__tests__/ChunksIntegrationSuperUser.test.ts new file mode 100644 index 000000000..7bc4efa9e --- /dev/null +++ b/js/sdk/__tests__/ChunksIntegrationSuperUser.test.ts @@ -0,0 +1,105 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +describe("r2rClient V3 Collections Integration Tests", () => { + let client: r2rClient; + let document_id: string; + let chunkId: string; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + await client.users.login({ + email: "admin@example.com", + password: "change_me_immediately", + }); + }); + + test("Create a chunk", async () => { + const response = await client.documents.create({ + chunks: ["Hello, world!"], + runWithOrchestration: false, + }); + + document_id = response.results.document_id; + + expect(response.results).toEqual({ + document_id: expect.any(String), + message: "Document created and ingested successfully.", + task_id: null, + }); + }); + + test("Retrieve document's chunks", async () => { + const response = await client.documents.listChunks({ + id: document_id, + }); + + chunkId = response.results[0]?.id; + + expect(chunkId).toBeDefined(); + expect(response.results[0]).toMatchObject({ + id: expect.any(String), + document_id: expect.any(String), + text: expect.any(String), + collection_ids: expect.any(Array), + metadata: expect.any(Object), + }); + }); + + test("Retrieve a chunk", async () => { + const response = await client.chunks.retrieve({ + id: chunkId, + }); + + expect(response.results).toMatchObject({ + id: expect.any(String), + document_id: expect.any(String), + text: expect.any(String), + collection_ids: expect.any(Array), + metadata: expect.any(Object), + }); + }); + + test("Update a chunk", async () => { + const response = await client.chunks.update({ + id: chunkId, + text: "Hello, world! How are you?", + }); + + expect(response.results).toMatchObject({ + id: expect.any(String), + document_id: expect.any(String), + text: "Hello, world! How are you?", + collection_ids: expect.any(Array), + metadata: expect.any(Object), + }); + }); + + test("Retrieve a chunk after update and check text", async () => { + const response = await client.chunks.retrieve({ + id: chunkId, + }); + + expect(response.results.text).toBe("Hello, world! How are you?"); + }); + + test("List chunks", async () => { + const response = await client.chunks.list(); + expect(response.results).toBeDefined(); + }); + + test("Delete a chunk", async () => { + const response = await client.chunks.delete({ + id: chunkId, + }); + expect(response.results.success).toBe(true); + }); + + // test("Delete a chunk that does not exist", async () => { + // await expect(client.chunks.delete({ id: chunkId })).rejects.toThrow( + // /Status 404/, + // ); + // }); +}); diff --git a/js/sdk/__tests__/CollectionsIntegrationSuperUser.test.ts b/js/sdk/__tests__/CollectionsIntegrationSuperUser.test.ts new file mode 100644 index 000000000..37dbfd513 --- /dev/null +++ b/js/sdk/__tests__/CollectionsIntegrationSuperUser.test.ts @@ -0,0 +1,117 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +/** + * zametov.txt will have an id of 69100f1e-2839-5b37-916d-5c87afe14094 + */ +describe("r2rClient V3 Collections Integration Tests", () => { + let client: r2rClient; + let collectionId: string; + let documentId: string; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + await client.users.login({ + email: "admin@example.com", + password: "change_me_immediately", + }); + }); + + test("Create new collection", async () => { + const response = await client.collections.create({ + name: "Test Collection", + }); + expect(response).toBeTruthy(); + collectionId = response.results.id; + }); + + test("List collections", async () => { + const response = await client.collections.list(); + expect(response.results).toBeDefined(); + }); + + test("Retrieve collection", async () => { + const response = await client.collections.retrieve({ id: collectionId }); + expect(response.results).toBeDefined(); + }); + + test("Update collection", async () => { + const response = await client.collections.update({ + id: collectionId, + name: "Updated Test Collection", + }); + expect(response.results).toBeDefined(); + }); + + test("Ingest document and assign to collection", async () => { + const ingestResponse = await client.documents.create({ + file: { path: "examples/data/zametov.txt", name: "zametov.txt" }, + metadata: { title: "zametov.txt" }, + }); + + expect(ingestResponse.results.document_id).toBeDefined(); + documentId = ingestResponse.results.document_id; + + const response = await client.collections.addDocument({ + id: collectionId, + documentId: documentId, + }); + + expect(response.results).toBeDefined(); + }, 10000); + + test("List documents in collection", async () => { + const response = await client.collections.listDocuments({ + id: collectionId, + }); + expect(response.results).toBeDefined(); + }); + + // TODO: Need to implement user methods in V3 + // test("Add user to collection", async () => { + // const response = await client.collections.addUser({ + // id: collectionId, + // userId: "", + // }); + // expect(response.results).toBeDefined + // }); + + test("List users in collection", async () => { + const response = await client.collections.listUsers({ id: collectionId }); + expect(response.results).toBeDefined(); + }); + + // TODO: Need to implement user methods in V3 + // test("Remove user from collection", async () => { + // const response = await client.collections.removeUser({ + // id: collectionId, + // userId: "", + // }); + // expect(response.results).toBeDefined(); + // }); + + test("Remove document from collection", async () => { + const response = await client.collections.removeDocument({ + id: collectionId, + documentId: documentId, + }); + + expect(response.results).toBeDefined(); + }); + + test("Delete zametov.txt", async () => { + const response = await client.documents.delete({ + id: "69100f1e-2839-5b37-916d-5c87afe14094", + }); + + expect(response.results).toBeDefined(); + }); + + test("Delete collection", async () => { + await expect( + client.collections.delete({ id: collectionId }), + ).resolves.toBeTruthy(); + }); +}); diff --git a/js/sdk/__tests__/ConversationsIntegrationSuperUser.test.ts b/js/sdk/__tests__/ConversationsIntegrationSuperUser.test.ts new file mode 100644 index 000000000..6db3f1f56 --- /dev/null +++ b/js/sdk/__tests__/ConversationsIntegrationSuperUser.test.ts @@ -0,0 +1,61 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +describe("r2rClient V3 Collections Integration Tests", () => { + let client: r2rClient; + let conversationId: string; + let messageId: string; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + await client.users.login({ + email: "admin@example.com", + password: "change_me_immediately", + }); + }); + + test("List all conversations", async () => { + const response = await client.conversations.list(); + expect(response.results).toBeDefined(); + }); + + test("Create a conversation", async () => { + const response = await client.conversations.create(); + conversationId = response.results.id; + expect(response.results).toBeDefined(); + }); + + test("Add a message to a conversation", async () => { + const response = await client.conversations.addMessage({ + id: conversationId, + content: "Hello, world!", + role: "user", + }); + messageId = response.results.id; + expect(response.results).toBeDefined(); + }); + + // TODO: This is throwing a 405? Why? + // test("Update a message in a conversation", async () => { + // const response = await client.conversations.updateMessage({ + // id: conversationId, + // message_id: messageId, + // content: "Hello, world! How are you?", + // }); + // expect(response.results).toBeDefined(); + // }); + + test("List branches in a conversation", async () => { + const response = await client.conversations.listBranches({ + id: conversationId, + }); + expect(response.results).toBeDefined(); + }); + + test("Delete a conversation", async () => { + const response = await client.conversations.delete({ id: conversationId }); + expect(response.results).toBeDefined(); + }); +}); diff --git a/js/sdk/__tests__/DocumentsAndCollectionsIntegrationUser.test.ts b/js/sdk/__tests__/DocumentsAndCollectionsIntegrationUser.test.ts new file mode 100644 index 000000000..55232e4ff --- /dev/null +++ b/js/sdk/__tests__/DocumentsAndCollectionsIntegrationUser.test.ts @@ -0,0 +1,192 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +/** + * User 1's document will have an id of `70b39c87-a9a6-50ae-9bd0-b9460325ad81` + * User 2's document will have an id of `43fd46da-b856-52c1-9ea7-2c4aaf84108c` + * User 1's collection will have an id of `81c948ae-d41d-5d49-becf-d605444af636` + * User 2's collection will have an id of `1f99a459-6d2e-5690-ad21-db026f019683` + */ +describe("r2rClient V3 System Integration Tests User", () => { + let client: r2rClient; + let user1Client: r2rClient; + let user2Client: r2rClient; + let user1Id: string; + let user2Id: string; + let user1DocumentId: string; + let user2DocumentId: string; + let user1CollectionId: string; + let user2CollectionId: string; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + user1Client = new r2rClient(baseUrl); + user2Client = new r2rClient(baseUrl); + + await client.users.login({ + email: "admin@example.com", + password: "change_me_immediately", + }); + }); + + test("Register user 1", async () => { + const response = await client.users.register({ + email: "user_1@example.com", + password: "change_me_immediately", + }); + + user1Id = response.results.id; + expect(response.results).toBeDefined(); + expect(response.results.is_superuser).toBe(false); + expect(response.results.name).toBe(null); + }); + + test("Login as a user 1", async () => { + const response = await user1Client.users.login({ + email: "user_1@example.com", + password: "change_me_immediately", + }); + expect(response.results).toBeDefined(); + }); + + test("Register user 2", async () => { + const response = await client.users.register({ + email: "user_2@example.com", + password: "change_me_immediately", + }); + + user2Id = response.results.id; + expect(response.results).toBeDefined(); + expect(response.results.is_superuser).toBe(false); + expect(response.results.name).toBe(null); + }); + + test("Login as a user 2", async () => { + const response = await user2Client.users.login({ + email: "user_2@example.com", + password: "change_me_immediately", + }); + expect(response.results).toBeDefined(); + }); + + test("Get the health of the system", async () => { + const response = await client.system.health(); + expect(response.results).toBeDefined(); + }); + + test("Get the health of the system as user 1", async () => { + const response = await user1Client.system.health(); + expect(response.results).toBeDefined(); + }); + + test("Get the health of the system as user 2", async () => { + const response = await user2Client.system.health(); + expect(response.results).toBeDefined(); + }); + + test("Get the collections of user 1", async () => { + const response = await user1Client.collections.list(); + + expect(response.results).toBeDefined(); + expect(response.results.length).toBe(1); + expect(response.total_entries).toBe(1); + user1CollectionId = response.results[0].id; + }); + + test("Get the collections of user 2", async () => { + const response = await user2Client.collections.list(); + + expect(response.results).toBeDefined(); + expect(response.results.length).toBe(1); + expect(response.total_entries).toBe(1); + user2CollectionId = response.results[0].id; + }); + + test("Create document as user 1 with file path", async () => { + const response = await user1Client.documents.create({ + file: { path: "examples/data/marmeladov.txt", name: "marmeladov.txt" }, + metadata: { title: "marmeladov.txt" }, + }); + + await new Promise((resolve) => setTimeout(resolve, 5000)); + + expect(response.results.document_id).toBeDefined(); + user1DocumentId = response.results.document_id; + }, 10000); + + test("Create document as user 2 with file path", async () => { + const response = await user2Client.documents.create({ + file: { path: "examples/data/marmeladov.txt", name: "marmeladov.txt" }, + metadata: { title: "marmeladov.txt" }, + }); + + await new Promise((resolve) => setTimeout(resolve, 5000)); + + expect(response.results.document_id).toBeDefined(); + user2DocumentId = response.results.document_id; + }, 10000); + + test("Retrieve document as user 1", async () => { + const response = await user1Client.documents.retrieve({ + id: user1DocumentId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(user1DocumentId); + }); + + test("Retrieve document as user 2", async () => { + const response = await user2Client.documents.retrieve({ + id: user2DocumentId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(user2DocumentId); + }); + + test("List documents with no parameters as user 1", async () => { + const response = await user1Client.documents.list(); + + expect(response.results).toBeDefined(); + expect(Array.isArray(response.results)).toBe(true); + }); + + test("List documents with no parameters as user 2", async () => { + const response = await user2Client.documents.list(); + + expect(response.results).toBeDefined(); + expect(Array.isArray(response.results)).toBe(true); + }); + + test("Delete document as user 1", async () => { + const response = await user1Client.documents.delete({ + id: user1DocumentId, + }); + expect(response.results).toBeDefined(); + }); + + test("Delete document as user 2", async () => { + const response = await user2Client.documents.delete({ + id: user2DocumentId, + }); + expect(response.results).toBeDefined(); + }); + + test("Delete user 1", async () => { + const response = await client.users.delete({ + id: user1Id, + password: "change_me_immediately", + }); + expect(response.results).toBeDefined(); + }); + + test("Delete user 2", async () => { + const response = await client.users.delete({ + id: user2Id, + password: "change_me_immediately", + }); + expect(response.results).toBeDefined(); + }); +}); diff --git a/js/sdk/__tests__/DocumentsIntegrationSuperUser.test.ts b/js/sdk/__tests__/DocumentsIntegrationSuperUser.test.ts new file mode 100644 index 000000000..40ecf747d --- /dev/null +++ b/js/sdk/__tests__/DocumentsIntegrationSuperUser.test.ts @@ -0,0 +1,104 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +/** + * marmeladov.txt will have an id of 83ef5342-4275-5b75-92d6-692fa32f8523 + * The untitled document will have an id of 5556836e-a51c-57c7-916a-de76c79df2b6 + */ +describe("r2rClient V3 Documents Integration Tests", () => { + let client: r2rClient; + let documentId: string; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + await client.users.login({ + email: "admin@example.com", + password: "change_me_immediately", + }); + }); + + test("Create document with file path", async () => { + const response = await client.documents.create({ + file: { path: "examples/data/marmeladov.txt", name: "marmeladov.txt" }, + metadata: { title: "marmeladov.txt" }, + }); + + expect(response.results.document_id).toBeDefined(); + documentId = response.results.document_id; + }, 10000); + + test("Create document with content", async () => { + const response = await client.documents.create({ + raw_text: "This is a test document", + metadata: { title: "Test Document" }, + }); + + expect(response.results.document_id).toBeDefined(); + }, 30000); + + test("Retrieve document", async () => { + const response = await client.documents.retrieve({ + id: documentId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(documentId); + }); + + test("List documents with no parameters", async () => { + const response = await client.documents.list(); + + expect(response.results).toBeDefined(); + expect(Array.isArray(response.results)).toBe(true); + }); + + test("List documents with parameters", async () => { + const response = await client.documents.list({ + offset: 0, + limit: 5, + }); + + expect(response.results).toBeDefined(); + expect(Array.isArray(response.results)).toBe(true); + expect(response.results.length).toBeLessThanOrEqual(5); + }); + + test("Error handling - Create document with no file or content", async () => { + await expect( + client.documents.create({ + metadata: { title: "No Content" }, + }), + ).rejects.toThrow(/Either file, raw_text, or chunks must be provided/); + }); + + test("Error handling - Create document with both file and content", async () => { + await expect( + client.documents.create({ + file: { + path: "examples/data/raskolnikov.txt", + name: "raskolnikov.txt", + }, + raw_text: "Test content", + metadata: { title: "Both File and Content" }, + }), + ).rejects.toThrow(/Only one of file, raw_text, or chunks may be provided/); + }); + + test("Delete Raskolnikov.txt", async () => { + const response = await client.documents.delete({ + id: "83ef5342-4275-5b75-92d6-692fa32f8523", + }); + + expect(response.results).toBeDefined(); + }); + + test("Delete untitled document", async () => { + const response = await client.documents.delete({ + id: "5556836e-a51c-57c7-916a-de76c79df2b6", + }); + + expect(response.results).toBeDefined(); + }); +}); diff --git a/js/sdk/__tests__/GraphsIntegrationSuperUser.test.ts b/js/sdk/__tests__/GraphsIntegrationSuperUser.test.ts new file mode 100644 index 000000000..68a510337 --- /dev/null +++ b/js/sdk/__tests__/GraphsIntegrationSuperUser.test.ts @@ -0,0 +1,491 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +describe("r2rClient V3 Graphs Integration Tests", () => { + let client: r2rClient; + let documentId: string; + let collectionId: string; + let entity1Id: string; + let entity2Id: string; + let relationshipId: string; + let communityId: string; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + await client.users.login({ + email: "admin@example.com", + password: "change_me_immediately", + }); + }); + + test("Create document with file path", async () => { + const response = await client.documents.create({ + file: { + path: "examples/data/raskolnikov_2.txt", + name: "raskolnikov_2.txt", + }, + metadata: { title: "raskolnikov_2.txt" }, + }); + + expect(response.results.document_id).toBeDefined(); + documentId = response.results.document_id; + }, 10000); + + test("Create new collection", async () => { + const response = await client.collections.create({ + name: "Raskolnikov Collection", + }); + expect(response).toBeTruthy(); + collectionId = response.results.id; + }); + + test("Retrieve collection", async () => { + const response = await client.collections.retrieve({ + id: collectionId, + }); + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(collectionId); + expect(response.results.name).toBe("Raskolnikov Collection"); + }); + + test("Update graph", async () => { + const response = await client.graphs.update({ + collectionId: collectionId, + name: "Raskolnikov Graph", + }); + + expect(response.results).toBeDefined(); + }); + + test("Retrieve graph and ensure that update was successful", async () => { + const response = await client.graphs.retrieve({ + collectionId: collectionId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.name).toBe("Raskolnikov Graph"); + expect(response.results.updated_at).not.toBe(response.results.created_at); + }); + + test("List graphs", async () => { + const response = await client.graphs.list({}); + + expect(response.results).toBeDefined(); + }); + + test("Check that there are no entities in the graph", async () => { + const response = await client.graphs.listEntities({ + collectionId: collectionId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.entries).toHaveLength(0); + }); + + test("Check that there are no relationships in the graph", async () => { + const response = await client.graphs.listRelationships({ + collectionId: collectionId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.entries).toHaveLength; + }); + + test("Extract entities from the document", async () => { + const response = await client.documents.extract({ + id: documentId, + }); + + await new Promise((resolve) => setTimeout(resolve, 30000)); + + expect(response.results).toBeDefined(); + }, 60000); + + test("Assign document to collection", async () => { + const response = await client.collections.addDocument({ + id: collectionId, + documentId: documentId, + }); + expect(response.results).toBeDefined(); + }); + + test("Pull entities into the graph", async () => { + const response = await client.graphs.pull({ + collectionId: collectionId, + }); + expect(response.results).toBeDefined(); + }); + + test("Check that there are entities in the graph", async () => { + const response = await client.graphs.listEntities({ + collectionId: collectionId, + }); + expect(response.results).toBeDefined(); + expect(response.total_entries).toBeGreaterThanOrEqual(1); + }, 60000); + + test("Check that there are relationships in the graph", async () => { + const response = await client.graphs.listRelationships({ + collectionId: collectionId, + }); + expect(response.results).toBeDefined(); + expect(response.total_entries).toBeGreaterThanOrEqual(1); + }); + + test("Check that there are no communities in the graph prior to building", async () => { + const response = await client.graphs.listCommunities({ + collectionId: collectionId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.entries).toHaveLength(0); + }); + + test("Build communities", async () => { + const response = await client.graphs.buildCommunities({ + collectionId: collectionId, + }); + + await new Promise((resolve) => setTimeout(resolve, 15000)); + + expect(response.results).toBeDefined(); + }, 45000); + + test("Check that there are communities in the graph", async () => { + const response = await client.graphs.listCommunities({ + collectionId: collectionId, + }); + + expect(response.results).toBeDefined(); + expect(response.total_entries).toBeGreaterThanOrEqual(1); + }); + + test("Create a new entity", async () => { + const response = await client.graphs.createEntity({ + collectionId: collectionId, + name: "Razumikhin", + description: "A good friend of Raskolnikov", + category: "Person", + }); + + expect(response.results).toBeDefined(); + entity1Id = response.results.id; + }); + + test("Create another new entity", async () => { + const response = await client.graphs.createEntity({ + collectionId: collectionId, + name: "Dunia", + description: "The sister of Raskolnikov", + category: "Person", + }); + + expect(response.results).toBeDefined(); + entity2Id = response.results.id; + }); + + test("Retrieve the entity", async () => { + const response = await client.graphs.getEntity({ + collectionId: collectionId, + entityId: entity1Id, + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(entity1Id); + expect(response.results.name).toBe("Razumikhin"); + expect(response.results.description).toBe("A good friend of Raskolnikov"); + }); + + test("Retrieve the other entity", async () => { + const response = await client.graphs.getEntity({ + collectionId: collectionId, + entityId: entity2Id, + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(entity2Id); + expect(response.results.name).toBe("Dunia"); + expect(response.results.description).toBe("The sister of Raskolnikov"); + }); + + test("Check that the entities are in the graph", async () => { + const response = await client.graphs.listEntities({ + collectionId: collectionId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.map((entity) => entity.id)).toContain(entity1Id); + expect(response.results.map((entity) => entity.id)).toContain(entity2Id); + }); + + test("Create a relationship between the entities", async () => { + const response = await client.graphs.createRelationship({ + collectionId: collectionId, + subject: "Razumikhin", + subjectId: entity1Id, + predicate: "falls in love with", + object: "Dunia", + objectId: entity2Id, + }); + + relationshipId = response.results.id; + + expect(response.results).toBeDefined(); + expect(response.results.subject).toBe("Razumikhin"); + expect(response.results.object).toBe("Dunia"); + expect(response.results.predicate).toBe("falls in love with"); + }); + + test("Retrieve the relationship", async () => { + const response = await client.graphs.getRelationship({ + collectionId: collectionId, + relationshipId: relationshipId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(relationshipId); + expect(response.results.subject).toBe("Razumikhin"); + expect(response.results.object).toBe("Dunia"); + expect(response.results.predicate).toBe("falls in love with"); + }); + + test("Create a new community", async () => { + const response = await client.graphs.createCommunity({ + collectionId: collectionId, + name: "Raskolnikov and Dunia Community", + summary: + "Raskolnikov and Dunia are siblings, the children of Pulcheria Alexandrovna", + findings: [ + "Raskolnikov and Dunia are siblings", + "They are the children of Pulcheria Alexandrovna", + "Their family comes from a modest background", + "Dunia works as a governess to support the family", + "Raskolnikov is a former university student", + "Both siblings are intelligent and well-educated", + "They maintain a close relationship despite living apart", + "Their mother Pulcheria writes letters to keep them connected", + ], + rating: 10, + ratingExplanation: + "Raskolnikov and Dunia are central to the story and have a complex relationship", + }); + + communityId = response.results.id; + + expect(response.results).toBeDefined(); + expect(response.results.name).toBe("Raskolnikov and Dunia Community"); + expect(response.results.summary).toBe( + "Raskolnikov and Dunia are siblings, the children of Pulcheria Alexandrovna", + ); + expect(response.results.findings).toContain( + "Raskolnikov and Dunia are siblings", + ); + expect(response.results.findings).toContain( + "They are the children of Pulcheria Alexandrovna", + ); + expect(response.results.findings).toContain( + "Their family comes from a modest background", + ); + expect(response.results.findings).toContain( + "Dunia works as a governess to support the family", + ); + expect(response.results.findings).toContain( + "Raskolnikov is a former university student", + ); + expect(response.results.findings).toContain( + "Both siblings are intelligent and well-educated", + ); + expect(response.results.findings).toContain( + "They maintain a close relationship despite living apart", + ); + expect(response.results.findings).toContain( + "Their mother Pulcheria writes letters to keep them connected", + ); + expect(response.results.rating).toBe(10); + //TODO: Why is this failing? + // expect(response.results.ratingExplanation).toBe( + // "Raskolnikov and Dunia are central to the story and have a complex relationship", + // ); + }); + + test("Update the entity", async () => { + const response = await client.graphs.updateEntity({ + collectionId: collectionId, + entityId: entity1Id, + name: "Dmitri Prokofich Razumikhin", + description: "A good friend of Raskolnikov and Dunia", + category: "Person", + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(entity1Id); + expect(response.results.name).toBe("Dmitri Prokofich Razumikhin"); + expect(response.results.description).toBe( + "A good friend of Raskolnikov and Dunia", + ); + }); + + test("Retrieve the updated entity", async () => { + const response = await client.graphs.getEntity({ + collectionId: collectionId, + entityId: entity1Id, + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(entity1Id); + expect(response.results.name).toBe("Dmitri Prokofich Razumikhin"); + expect(response.results.description).toBe( + "A good friend of Raskolnikov and Dunia", + ); + }); + + // This test is failing because we attach a separate name to the relationship, rather + // than use the names of the entities. This needs to be fixed in the backend. + // test("Ensure that the entity was updated in the relationship", async () => { + // const response = await client.graphs.getRelationship({ + // collectionId: collectionId, + // relationshipId: relationshipId, + // }); + + // expect(response.results).toBeDefined(); + // expect(response.results.subject).toBe("Dmitri Prokofich Razumikhin"); + // expect(response.results.object).toBe("Dunia"); + // expect(response.results.predicate).toBe("falls in love with"); + // }); + + test("Update the relationship", async () => { + const response = await client.graphs.updateRelationship({ + collectionId: collectionId, + relationshipId: relationshipId, + subject: "Razumikhin", + subjectId: entity1Id, + predicate: "marries", + object: "Dunia", + objectId: entity2Id, + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(relationshipId); + expect(response.results.subject).toBe("Razumikhin"); + expect(response.results.object).toBe("Dunia"); + expect(response.results.predicate).toBe("marries"); + }); + + test("Retrieve the updated relationship", async () => { + const response = await client.graphs.getRelationship({ + collectionId: collectionId, + relationshipId: relationshipId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(relationshipId); + expect(response.results.subject).toBe("Razumikhin"); + expect(response.results.object).toBe("Dunia"); + expect(response.results.predicate).toBe("marries"); + }); + + test("Update the community", async () => { + const response = await client.graphs.updateCommunity({ + collectionId: collectionId, + communityId: communityId, + name: "Rodion Romanovich Raskolnikov and Avdotya Romanovna Raskolnikova Community", + summary: + "Rodion and Avdotya are siblings, the children of Pulcheria Alexandrovna Raskolnikova", + }); + + expect(response.results).toBeDefined(); + expect(response.results.name).toBe( + "Rodion Romanovich Raskolnikov and Avdotya Romanovna Raskolnikova Community", + ); + expect(response.results.summary).toBe( + "Rodion and Avdotya are siblings, the children of Pulcheria Alexandrovna Raskolnikova", + ); + }); + + test("Retrieve the updated community", async () => { + const response = await client.graphs.getCommunity({ + collectionId: collectionId, + communityId: communityId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.id).toBe(communityId); + expect(response.results.name).toBe( + "Rodion Romanovich Raskolnikov and Avdotya Romanovna Raskolnikova Community", + ); + expect(response.results.summary).toBe( + "Rodion and Avdotya are siblings, the children of Pulcheria Alexandrovna Raskolnikova", + ); + }); + + test("Delete the community", async () => { + const response = await client.graphs.deleteCommunity({ + collectionId: collectionId, + communityId: communityId, + }); + + expect(response.results).toBeDefined(); + }); + + test("Check that the community was deleted", async () => { + const response = await client.graphs.listCommunities({ + collectionId: collectionId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.entries).toHaveLength(0); + }); + + test("Reset the graph", async () => { + const response = await client.graphs.reset({ + collectionId: collectionId, + }); + + expect(response.results).toBeDefined(); + }); + + test("Check that there are no entities in the graph", async () => { + const response = await client.graphs.listEntities({ + collectionId: collectionId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.entries).toHaveLength(0); + }); + + test("Check that there are no relationships in the graph", async () => { + const response = await client.graphs.listRelationships({ + collectionId: collectionId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.entries).toHaveLength(0); + }); + + test("Delete raskolnikov_2.txt", async () => { + const response = await client.documents.delete({ + id: documentId, + }); + + expect(response.results).toBeDefined(); + }); + + test("Check that the document is not in the collection", async () => { + const response = await client.collections.listDocuments({ + id: collectionId, + }); + + expect(response.results).toBeDefined(); + expect(response.results.entries).toHaveLength(0); + }); + + test("Delete Raskolnikov Collection", async () => { + const response = await client.collections.delete({ + id: collectionId, + }); + + expect(response.results).toBeDefined(); + }); +}); diff --git a/js/sdk/__tests__/PromptsIntegrationSuperUser.test.ts b/js/sdk/__tests__/PromptsIntegrationSuperUser.test.ts new file mode 100644 index 000000000..15b0d2e52 --- /dev/null +++ b/js/sdk/__tests__/PromptsIntegrationSuperUser.test.ts @@ -0,0 +1,53 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +describe("r2rClient V3 Collections Integration Tests", () => { + let client: r2rClient; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + await client.users.login({ + email: "admin@example.com", + password: "change_me_immediately", + }); + }); + + test("List prompts", async () => { + const response = await client.prompts.list(); + expect(response.results).toBeDefined(); + }); + + test("Create a prompt", async () => { + const response = await client.prompts.create({ + name: "test-prompt", + template: "Hello, {name}!", + inputTypes: { name: "string" }, + }); + expect(response.results).toBeDefined(); + }); + + test("Retrieve a prompt", async () => { + const response = await client.prompts.retrieve({ + name: "test-prompt", + }); + expect(response.results).toBeDefined(); + }); + + test("Update a prompt", async () => { + const response = await client.prompts.update({ + name: "test-prompt", + template: "Hello, {name}! How are you?", + inputTypes: { name: "string" }, + }); + expect(response.results).toBeDefined(); + }); + + test("Delete a prompt", async () => { + const response = await client.prompts.delete({ + name: "test-prompt", + }); + expect(response.results).toBeDefined(); + }); +}); diff --git a/js/sdk/__tests__/RetrievalIntegrationSuperUser.test.ts b/js/sdk/__tests__/RetrievalIntegrationSuperUser.test.ts new file mode 100644 index 000000000..89451941e --- /dev/null +++ b/js/sdk/__tests__/RetrievalIntegrationSuperUser.test.ts @@ -0,0 +1,133 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +const message = { + role: "user" as const, + content: "Tell me about Sonia.", +}; + +/** + * sonia.txt will have an id of 28ce9a4c-4d15-5287-b0c6-67834b9c4546 + */ +describe("r2rClient V3 Documents Integration Tests", () => { + let client: r2rClient; + let documentId: string; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + await client.users.login({ + email: "admin@example.com", + password: "change_me_immediately", + }); + }); + + async function readStream( + stream: ReadableStream, + ): Promise { + const reader = stream.getReader(); + let result = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + result += new TextDecoder().decode(value); + } + + return result; + } + + test("Create document with file path", async () => { + const response = await client.documents.create({ + file: { path: "examples/data/sonia.txt", name: "sonia.txt" }, + metadata: { title: "sonia.txt" }, + }); + + expect(response.results.document_id).toBeDefined(); + documentId = response.results.document_id; + }, 10000); + + test("Search documents with no parameters", async () => { + const response = await client.retrieval.search({ query: "Sonia" }); + + expect(response.results).toBeDefined(); + }); + + test("RAG with no parameters", async () => { + const response = await client.retrieval.rag({ query: "Sonia" }); + + expect(response.results).toBeDefined(); + }, 30000); + + test("Streaming RAG", async () => { + const stream = await client.retrieval.rag({ + query: "Sonia", + ragGenerationConfig: { + stream: true, + }, + }); + + expect(stream).toBeInstanceOf(ReadableStream); + const content = await readStream(stream); + expect(content).toBeTruthy(); + expect(typeof content).toBe("string"); + expect(content.length).toBeGreaterThan(0); + }, 30000); + + test("Agent with no parameters", async () => { + const response = await client.retrieval.agent({ + message: message, + }); + + expect(response.results).toBeDefined(); + }, 30000); + + test("Streaming agent", async () => { + const stream = await client.retrieval.agent({ + message: message, + ragGenerationConfig: { + stream: true, + }, + }); + + expect(stream).toBeInstanceOf(ReadableStream); + const content = await readStream(stream); + expect(content).toBeTruthy(); + expect(typeof content).toBe("string"); + expect(content.length).toBeGreaterThan(0); + }, 30000); + + // test("Completion with no parameters", async () => { + // const response = await client.retrieval.completion({ + // messages: messages, + // }); + + // expect(response.results).toBeDefined(); + // }, 30000); + + // test("Streaming Completion", async () => { + // const stream = await client.retrieval.completion({ + // messages: messages, + // generation_config: { + // stream: true, + // }, + // }); + + // expect(stream).toBeInstanceOf(ReadableStream); + // const content = await readStream(stream); + // expect(content).toBeTruthy(); + // expect(typeof content).toBe("string"); + // expect(content.length).toBeGreaterThan(0); + // }, 30000); + + test("Delete untitled document", async () => { + const response = await client.documents.delete({ + id: "28ce9a4c-4d15-5287-b0c6-67834b9c4546", + }); + + expect(response.results).toBeDefined(); + }); +}); diff --git a/js/sdk/__tests__/SystemIntegrationSuperUser.test.ts b/js/sdk/__tests__/SystemIntegrationSuperUser.test.ts new file mode 100644 index 000000000..076ecbc11 --- /dev/null +++ b/js/sdk/__tests__/SystemIntegrationSuperUser.test.ts @@ -0,0 +1,36 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +describe("r2rClient V3 Collections Integration Tests", () => { + let client: r2rClient; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + await client.users.login({ + email: "admin@example.com", + password: "change_me_immediately", + }); + }); + + test("Get the health of the system", async () => { + const response = await client.system.health(); + expect(response.results).toBeDefined(); + }); + + test("Get system logs", async () => { + const response = await client.system.logs({}); + expect(response.results).toBeDefined(); + }); + + test("Get the settings of the system", async () => { + const response = await client.system.settings(); + expect(response.results).toBeDefined(); + }); + + test("Get the status of the system", async () => { + const response = await client.system.status(); + expect(response.results).toBeDefined(); + }); +}); diff --git a/js/sdk/__tests__/SystemIntegrationUser.test.ts b/js/sdk/__tests__/SystemIntegrationUser.test.ts new file mode 100644 index 000000000..922a6e440 --- /dev/null +++ b/js/sdk/__tests__/SystemIntegrationUser.test.ts @@ -0,0 +1,60 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +describe("r2rClient V3 System Integration Tests User", () => { + let client: r2rClient; + let userId: string; + let name: string | undefined; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + }); + + test("Register a new user", async () => { + const response = await client.users.register({ + email: "system_integration_test_user@example.com", + password: "change_me_immediately", + }); + + userId = response.results.id; + name = response.results.name; + expect(response.results).toBeDefined(); + expect(response.results.is_superuser).toBe(false); + expect(response.results.name).toBe(null); + }); + + test("Login as a user", async () => { + const response = await client.users.login({ + email: "system_integration_test_user@example.com", + password: "change_me_immediately", + }); + expect(response.results).toBeDefined(); + }); + + test("Get the health of the system", async () => { + const response = await client.system.health(); + expect(response.results).toBeDefined(); + }); + + test("Only a superuser can call the `system/logs` endpoint.", async () => { + await expect(client.system.logs({})).rejects.toThrow(/Status 403/); + }); + + test("Only a superuser can call the `system/settings` endpoint.", async () => { + await expect(client.system.settings()).rejects.toThrow(/Status 403/); + }); + + test("Only an authorized user can call the `system/status` endpoint.", async () => { + await expect(client.system.status()).rejects.toThrow(/Status 403/); + }); + + test("Delete a user", async () => { + const response = await client.users.delete({ + id: userId, + password: "change_me_immediately", + }); + expect(response.results).toBeDefined(); + }); +}); diff --git a/js/sdk/__tests__/UsersIntegrationSuperUser.test.ts b/js/sdk/__tests__/UsersIntegrationSuperUser.test.ts new file mode 100644 index 000000000..9d70acaf4 --- /dev/null +++ b/js/sdk/__tests__/UsersIntegrationSuperUser.test.ts @@ -0,0 +1,102 @@ +import { r2rClient } from "../src/index"; +import { describe, test, beforeAll, expect } from "@jest/globals"; + +const baseUrl = "http://localhost:7272"; + +describe("r2rClient V3 Users Integration Tests", () => { + let client: r2rClient; + let userId: string; + let name: string | undefined; + + beforeAll(async () => { + client = new r2rClient(baseUrl); + }); + + test("Register a new user", async () => { + const response = await client.users.register({ + email: "new_user@example.com", + password: "change_me_immediately", + }); + + userId = response.results.id; + name = response.results.name; + expect(response.results).toBeDefined(); + expect(response.results.is_superuser).toBe(false); + expect(response.results.name).toBe(null); + }); + + test("Login as a user", async () => { + const response = await client.users.login({ + email: "new_user@example.com", + password: "change_me_immediately", + }); + expect(response.results).toBeDefined(); + }); + + test("Logout as a user", async () => { + await client.users.logout(); + }); + + test("Login as a user after logout", async () => { + const response = await client.users.login({ + email: "new_user@example.com", + password: "change_me_immediately", + }); + expect(response.results).toBeDefined(); + }); + + test("Change a user's password", async () => { + const response = await client.users.changePassword({ + current_password: "change_me_immediately", + new_password: "i_was_changed_immediately", + }); + expect(response.results).toBeDefined(); + }); + + test("Logout and login with new password", async () => { + await client.users.logout(); + + const login_response = await client.users.login({ + email: "new_user@example.com", + password: "i_was_changed_immediately", + }); + expect(login_response.results).toBeDefined(); + }); + + test("Retrieve the current user", async () => { + const response = await client.users.me(); + expect(response.results).toBeDefined(); + }); + + test("Retrieve a user", async () => { + const response = await client.users.retrieve({ id: userId }); + expect(response.results).toBeDefined(); + }); + + test("Update a user", async () => { + const response = await client.users.update({ + id: userId, + name: "New Name", + }); + expect(response.results).toBeDefined(); + }); + + test("Retrieve a user after update", async () => { + const response = await client.users.retrieve({ id: userId }); + expect(response.results).toBeDefined(); + }); + + test("List user's collections", async () => { + const response = await client.users.listCollections({ id: userId }); + expect(response.results).toBeDefined(); + expect(Array.isArray(response.results)).toBe(true); + }); + + test("Delete a user", async () => { + const response = await client.users.delete({ + id: userId, + password: "i_was_changed_immediately", + }); + expect(response.results).toBeDefined(); + }); +}); diff --git a/js/sdk/__tests__/r2rClient.test.ts b/js/sdk/__tests__/r2rClient.test.ts deleted file mode 100644 index 478774266..000000000 --- a/js/sdk/__tests__/r2rClient.test.ts +++ /dev/null @@ -1,575 +0,0 @@ -import { r2rClient } from "../src/r2rClient"; -import axios from "axios"; -import { describe, test, beforeEach, expect, jest } from "@jest/globals"; - -jest.mock("axios"); - -describe("R2RClient", () => { - let client: r2rClient; - let mockAxiosInstance: any; - - beforeEach(() => { - mockAxiosInstance = { - post: jest.fn(), - request: jest.fn(), - defaults: { baseURL: "http://0.0.0.0:7272/v2" }, - }; - - (axios.create as jest.Mock).mockReturnValue(mockAxiosInstance); - - client = new r2rClient("http://0.0.0.0:7272"); - }); - - describe("Mocked Tests", () => { - test("should correctly set the baseURL with prefix", () => { - expect((client as any).axiosInstance.defaults.baseURL).toBe( - "http://0.0.0.0:7272/v2", - ); - }); - - test("health should return data from the /health endpoint", async () => { - const mockResponse = { response: "ok" }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - const result = await client.health(); - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "GET", - url: "health", - headers: {}, - responseType: "json", - }); - }); - }); - - describe("Authentication Methods", () => { - test("register should send POST request to /register with correct data", async () => { - const mockResponse = { success: true }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - const email = "test@example.com"; - const password = "password123"; - const result = await client.register(email, password); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "register", - data: JSON.stringify({ email, password }), - headers: { - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - - test("login should send POST request to /login with correct data and set tokens", async () => { - const mockResponse = { - results: { - access_token: { token: "access-token", token_type: "access_token" }, - refresh_token: { - token: "refresh-token", - token_type: "refresh_token", - }, - }, - }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - const email = "test@example.com"; - const password = "password123"; - const result = await client.login(email, password); - - expect(result).toEqual(mockResponse.results); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "login", - data: "username=test%40example.com&password=password123", - headers: { - "Content-Type": "application/x-www-form-urlencoded", - }, - responseType: "json", - }); - // Check that tokens are set - expect((client as any).accessToken).toBe("access-token"); - expect((client as any).refreshToken).toBe("refresh-token"); - }); - - test("verifyEmail should send POST request to /verify_email with correct data", async () => { - const mockResponse = { success: true }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - const email = "test@example.com"; - const verification_code = "123456"; - const result = await client.verifyEmail(email, verification_code); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "verify_email", - data: JSON.stringify({ email, verification_code }), - headers: { - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - - test("requestPasswordReset should send POST request to /request_password_reset with correct data", async () => { - const mockResponse = { success: true }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - const email = "test@example.com"; - const result = await client.requestPasswordReset(email); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "request_password_reset", - data: '"test@example.com"', - headers: { - "Content-Type": "application/json", - }, - responseType: "json", - params: undefined, - }); - }); - - test("logout should send POST request to /logout and clear tokens", async () => { - mockAxiosInstance.request.mockResolvedValue({ data: {} }); - - // Set tokens first - (client as any).accessToken = "access-token"; - (client as any).refreshToken = "refresh-token"; - - const result = await client.logout(); - - expect(result).toEqual({}); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "logout", - headers: { - Authorization: "Bearer access-token", - }, - responseType: "json", - }); - expect((client as any).accessToken).toBeNull(); - expect((client as any).refreshToken).toBeNull(); - }); - - test("user should send GET request to /user and return data", async () => { - const mockResponse = { id: "user-id", email: "test@example.com" }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const result = await client.user(); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "GET", - url: "user", - headers: { - Authorization: "Bearer access-token", - }, - responseType: "json", - }); - }); - - test("updateUser should send PUT request to /user with correct data", async () => { - const mockResponse = { success: true }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const userId = "user-id"; - const email = "new@example.com"; - const name = "New Name"; - const bio = "New Bio"; - const profilePicture = "http://example.com/pic.jpg"; - const isSuperuser = true; - - const result = await client.updateUser( - userId, - email, - isSuperuser, - name, - bio, - profilePicture, - ); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "PUT", - url: "user", - data: JSON.stringify({ - user_id: userId, - email, - is_superuser: isSuperuser, - name, - bio, - profile_picture: profilePicture, - }), - headers: { - Authorization: "Bearer access-token", - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - - test("refreshAccessToken should send POST request to /refresh_access_token and update tokens", async () => { - const mockResponse = { - results: { - access_token: { - token: "new-access-token", - token_type: "access_token", - }, - refresh_token: { - token: "new-refresh-token", - token_type: "refresh_token", - }, - }, - }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set refreshToken - (client as any).refreshToken = "old-refresh-token"; - - const result = await client.refreshAccessToken(); - - expect(result).toEqual(mockResponse); - expect((client as any).accessToken).toBe("new-access-token"); - expect((client as any).refreshToken).toBe("new-refresh-token"); - - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "refresh_access_token", - data: "old-refresh-token", - headers: { - "Content-Type": "application/x-www-form-urlencoded", - }, - responseType: "json", - }); - }); - - test("changePassword should send POST request to /change_password with correct data", async () => { - const mockResponse = { success: true }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const current_password = "old-password"; - const new_password = "new-password"; - - const result = await client.changePassword( - current_password, - new_password, - ); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "change_password", - data: JSON.stringify({ - current_password, - new_password, - }), - headers: { - Authorization: "Bearer access-token", - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - - test("confirmPasswordReset should send POST request to /reset_password/{resetToken} with correct data", async () => { - const mockResponse = { success: true }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - const resetToken = "reset-token"; - const newPassword = "new-password"; - - const result = await client.confirmPasswordReset(resetToken, newPassword); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: `reset_password/${resetToken}`, - data: JSON.stringify({ new_password: newPassword }), - headers: { - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - - test("deleteUser should send DELETE request to /user/{userId} with correct data", async () => { - const mockResponse = { success: true }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const userId = "user-id"; - const password = "password123"; - - const result = await client.deleteUser(userId, password); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "DELETE", - url: `user/${userId}`, - data: JSON.stringify({ password }), - headers: { - Authorization: "Bearer access-token", - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - }); - - describe("Ingestion Methods", () => { - test("ingestChunks should send POST request to /ingest_chunks with correct data", async () => { - const mockResponse = { success: true }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const chunks = [ - { text: "Chunk 1", metadata: {} }, - { text: "Chunk 2", metadata: {} }, - ]; - const documentId = "doc-id"; - const metadata = { key: "value" }; - const run_with_orchestration = true; - - const result = await client.ingestChunks( - chunks, - documentId, - metadata, - run_with_orchestration, - undefined, - ); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "ingest_chunks", - data: JSON.stringify({ - chunks, - document_id: documentId, - metadata, - run_with_orchestration, - }), - headers: { - Authorization: "Bearer access-token", - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - - test("updateChunk should send PUT request to /update_chunk/{documentId}/{extractionId} with correct data", async () => { - const mockResponse = { success: true }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const documentId = "doc-id"; - const extractionId = "chunk-id"; - const text = "Updated text"; - const metadata = { key: "new value" }; - const runWithOrchestration = false; - - const result = await client.updateChunk( - documentId, - extractionId, - text, - metadata, - runWithOrchestration, - ); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "PUT", - url: `update_chunk/${documentId}/${extractionId}`, - data: JSON.stringify({ - text, - metadata, - run_with_orchestration: runWithOrchestration, - }), - headers: { - Authorization: "Bearer access-token", - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - }); - - describe("Management Methods", () => { - test("serverStats should send GET request to /server_stats and return data", async () => { - const mockResponse = { uptime: 12345 }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const result = await client.serverStats(); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "GET", - url: "server_stats", - headers: { - Authorization: "Bearer access-token", - }, - responseType: "json", - }); - }); - - test("updatePrompt should send POST request to /update_prompt with correct data", async () => { - const mockResponse = { success: true }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const name = "default_system"; - const template = "New template"; - const input_types = { key: "value" }; - - const result = await client.updatePrompt(name, template, input_types); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "update_prompt", - data: JSON.stringify({ - name, - template, - input_types, - }), - headers: { - Authorization: "Bearer access-token", - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - - test("analytics should send GET request to /analytics with correct params", async () => { - const mockResponse = { data: [] }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const filter_criteria = { date: "2021-01-01" }; - const analysis_types = ["type1", "type2"]; - - const result = await client.analytics(filter_criteria, analysis_types); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith( - expect.objectContaining({ - method: "GET", - url: "analytics", - params: { - filter_criteria: JSON.stringify(filter_criteria), - analysis_types: JSON.stringify(analysis_types), - }, - headers: { - Authorization: "Bearer access-token", - }, - responseType: "json", - }), - ); - }); - }); - - describe("Retrieval Methods", () => { - test("search should send POST request to /search with correct data", async () => { - const mockResponse = { results: [] }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const query = "test query"; - const vector_search_settings = { top_k: 5 }; - const kg_search_settings = { max_hops: 2 }; - - const result = await client.search( - query, - vector_search_settings, - kg_search_settings, - ); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "search", - data: JSON.stringify({ - query, - vector_search_settings, - kg_search_settings, - }), - headers: { - Authorization: "Bearer access-token", - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - - test("rag should send POST request to /rag with correct data", async () => { - const mockResponse = { answer: "Test answer" }; - mockAxiosInstance.request.mockResolvedValue({ data: mockResponse }); - - // Set accessToken - (client as any).accessToken = "access-token"; - - const query = "test query"; - const rag_generation_config = { max_tokens: 100 }; - const vector_search_settings = { top_k: 5 }; - const kg_search_settings = { max_hops: 2 }; - const task_prompt_override = "Custom prompt"; - const include_title_if_available = true; - - const result = await client.rag( - query, - vector_search_settings, - kg_search_settings, - rag_generation_config, - task_prompt_override, - include_title_if_available, - ); - - expect(result).toEqual(mockResponse); - expect(mockAxiosInstance.request).toHaveBeenCalledWith({ - method: "POST", - url: "rag", - data: JSON.stringify({ - query, - vector_search_settings, - kg_search_settings, - rag_generation_config, - task_prompt_override, - include_title_if_available, - }), - headers: { - Authorization: "Bearer access-token", - "Content-Type": "application/json", - }, - responseType: "json", - }); - }); - }); -}); diff --git a/js/sdk/examples/data/marmeladov.txt b/js/sdk/examples/data/marmeladov.txt new file mode 100644 index 000000000..b626593d6 --- /dev/null +++ b/js/sdk/examples/data/marmeladov.txt @@ -0,0 +1,22 @@ +His conversation seemed to excite a general though languid interest. The +boys at the counter fell to sniggering. The innkeeper came down from the +upper room, apparently on purpose to listen to the “funny fellow” + and sat down at a little distance, yawning lazily, but with dignity. +Evidently Marmeladov was a familiar figure here, and he had most +likely acquired his weakness for high-flown speeches from the habit of +frequently entering into conversation with strangers of all sorts in +the tavern. This habit develops into a necessity in some drunkards, and +especially in those who are looked after sharply and kept in order +at home. Hence in the company of other drinkers they try to justify +themselves and even if possible obtain consideration. + +“Funny fellow!” pronounced the innkeeper. “And why don’t you work, why +aren’t you at your duty, if you are in the service?” + +“Why am I not at my duty, honoured sir,” Marmeladov went on, addressing +himself exclusively to Raskolnikov, as though it had been he who put +that question to him. “Why am I not at my duty? Does not my heart ache +to think what a useless worm I am? A month ago when Mr. Lebeziatnikov +beat my wife with his own hands, and I lay drunk, didn’t I suffer? +Excuse me, young man, has it ever happened to you... hm... well, to +petition hopelessly for a loan?” diff --git a/js/sdk/examples/data/raskolnikov_2.txt b/js/sdk/examples/data/raskolnikov_2.txt new file mode 100644 index 000000000..895e99965 --- /dev/null +++ b/js/sdk/examples/data/raskolnikov_2.txt @@ -0,0 +1,7 @@ +When Raskolnikov got home, his hair was soaked with sweat and he was +breathing heavily. He went rapidly up the stairs, walked into his +unlocked room and at once fastened the latch. Then in senseless terror +he rushed to the corner, to that hole under the paper where he had put +the things; put his hand in, and for some minutes felt carefully in the +hole, in every crack and fold of the paper. Finding nothing, he got up +and drew a deep breath. diff --git a/js/sdk/examples/data/sonia.txt b/js/sdk/examples/data/sonia.txt new file mode 100644 index 000000000..9a030fe0d --- /dev/null +++ b/js/sdk/examples/data/sonia.txt @@ -0,0 +1,39 @@ +On the canal bank near the bridge and not two houses away from the one +where Sonia lodged, there was a crowd of people, consisting principally +of gutter children. The hoarse broken voice of Katerina Ivanovna could +be heard from the bridge, and it certainly was a strange spectacle +likely to attract a street crowd. Katerina Ivanovna in her old dress +with the green shawl, wearing a torn straw hat, crushed in a hideous way +on one side, was really frantic. She was exhausted and breathless. Her +wasted consumptive face looked more suffering than ever, and indeed out +of doors in the sunshine a consumptive always looks worse than at home. +But her excitement did not flag, and every moment her irritation grew +more intense. She rushed at the children, shouted at them, coaxed +them, told them before the crowd how to dance and what to sing, began +explaining to them why it was necessary, and driven to desperation by +their not understanding, beat them.... Then she would make a rush at the +crowd; if she noticed any decently dressed person stopping to look, she +immediately appealed to him to see what these children “from a genteel, +one may say aristocratic, house” had been brought to. If she heard +laughter or jeering in the crowd, she would rush at once at the scoffers +and begin squabbling with them. Some people laughed, others shook their +heads, but everyone felt curious at the sight of the madwoman with the +frightened children. The frying-pan of which Lebeziatnikov had spoken +was not there, at least Raskolnikov did not see it. But instead of +rapping on the pan, Katerina Ivanovna began clapping her wasted hands, +when she made Lida and Kolya dance and Polenka sing. She too joined in +the singing, but broke down at the second note with a fearful cough, +which made her curse in despair and even shed tears. What made her most +furious was the weeping and terror of Kolya and Lida. Some effort had +been made to dress the children up as street singers are dressed. The +boy had on a turban made of something red and white to look like a Turk. +There had been no costume for Lida; she simply had a red knitted cap, +or rather a night cap that had belonged to Marmeladov, decorated with +a broken piece of white ostrich feather, which had been Katerina +Ivanovna’s grandmother’s and had been preserved as a family possession. +Polenka was in her everyday dress; she looked in timid perplexity at her +mother, and kept at her side, hiding her tears. She dimly realised her +mother’s condition, and looked uneasily about her. She was terribly +frightened of the street and the crowd. Sonia followed Katerina +Ivanovna, weeping and beseeching her to return home, but Katerina +Ivanovna was not to be persuaded. diff --git a/js/sdk/examples/data/zametov.txt b/js/sdk/examples/data/zametov.txt new file mode 100644 index 000000000..69b275f69 --- /dev/null +++ b/js/sdk/examples/data/zametov.txt @@ -0,0 +1,19 @@ +“How he keeps on! Are you afraid of having let out some secret? Don’t +worry yourself; you said nothing about a countess. But you said a lot +about a bulldog, and about ear-rings and chains, and about Krestovsky +Island, and some porter, and Nikodim Fomitch and Ilya Petrovitch, the +assistant superintendent. And another thing that was of special interest +to you was your own sock. You whined, ‘Give me my sock.’ Zametov +hunted all about your room for your socks, and with his own scented, +ring-bedecked fingers he gave you the rag. And only then were you +comforted, and for the next twenty-four hours you held the wretched +thing in your hand; we could not get it from you. It is most likely +somewhere under your quilt at this moment. And then you asked so +piteously for fringe for your trousers. We tried to find out what sort +of fringe, but we could not make it out. Now to business! Here are +thirty-five roubles; I take ten of them, and shall give you an account +of them in an hour or two. I will let Zossimov know at the same time, +though he ought to have been here long ago, for it is nearly twelve. And +you, Nastasya, look in pretty often while I am away, to see whether he +wants a drink or anything else. And I will tell Pashenka what is wanted +myself. Good-bye!” diff --git a/js/sdk/jest.config.js b/js/sdk/jest.config.js index 4499ebd11..ec90ea88f 100644 --- a/js/sdk/jest.config.js +++ b/js/sdk/jest.config.js @@ -5,4 +5,5 @@ module.exports = { "**/__tests__/**/*.ts?(x)", "**/__tests__/**/?(*.)+(spec|test).ts?(x)", ], + maxWorkers: 1, }; diff --git a/js/sdk/package-lock.json b/js/sdk/package-lock.json index 4f20317f0..fbd907005 100644 --- a/js/sdk/package-lock.json +++ b/js/sdk/package-lock.json @@ -1,12 +1,12 @@ { "name": "r2r-js", - "version": "0.3.17", + "version": "0.4.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "r2r-js", - "version": "0.3.17", + "version": "0.3.18", "license": "ISC", "dependencies": { "@jest/globals": "^29.7.0", diff --git a/js/sdk/package.json b/js/sdk/package.json index c7aa9cc71..6baae6150 100644 --- a/js/sdk/package.json +++ b/js/sdk/package.json @@ -1,6 +1,6 @@ { "name": "r2r-js", - "version": "0.3.17", + "version": "0.4.0", "description": "", "main": "dist/index.js", "browser": "dist/index.browser.js", diff --git a/js/sdk/pnpm-lock.yaml b/js/sdk/pnpm-lock.yaml index bc3bed2e7..28e0c9dce 100644 --- a/js/sdk/pnpm-lock.yaml +++ b/js/sdk/pnpm-lock.yaml @@ -1,17 +1,16 @@ -lockfileVersion: '9.0' +lockfileVersion: "9.0" settings: autoInstallPeers: true excludeLinksFromLockfile: false importers: - .: dependencies: - '@jest/globals': + "@jest/globals": specifier: ^29.7.0 version: 29.7.0 - '@rrweb/types': + "@rrweb/types": specifier: 2.0.0-alpha.17 version: 2.0.0-alpha.17 axios: @@ -33,16 +32,16 @@ importers: specifier: ^10.0.0 version: 10.0.0 devDependencies: - '@rrweb/record': + "@rrweb/record": specifier: 2.0.0-alpha.17 version: 2.0.0-alpha.17 - '@types/jest': + "@types/jest": specifier: ^29.5.13 version: 29.5.13 - '@types/node': + "@types/node": specifier: ^20.16.12 version: 20.16.14 - '@types/uuid': + "@types/uuid": specifier: ^10.0.0 version: 10.0.0 jest: @@ -62,556 +61,969 @@ importers: version: 5.6.3 packages: - - '@ampproject/remapping@2.3.0': - resolution: {integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==} - engines: {node: '>=6.0.0'} - - '@babel/code-frame@7.25.9': - resolution: {integrity: sha512-z88xeGxnzehn2sqZ8UdGQEvYErF1odv2CftxInpSYJt6uHuPe9YjahKZITGs3l5LeI9d2ROG+obuDAoSlqbNfQ==} - engines: {node: '>=6.9.0'} - - '@babel/compat-data@7.25.9': - resolution: {integrity: sha512-yD+hEuJ/+wAJ4Ox2/rpNv5HIuPG82x3ZlQvYVn8iYCprdxzE7P1udpGF1jyjQVBU4dgznN+k2h103vxZ7NdPyw==} - engines: {node: '>=6.9.0'} - - '@babel/core@7.25.9': - resolution: {integrity: sha512-WYvQviPw+Qyib0v92AwNIrdLISTp7RfDkM7bPqBvpbnhY4wq8HvHBZREVdYDXk98C8BkOIVnHAY3yvj7AVISxQ==} - engines: {node: '>=6.9.0'} - - '@babel/generator@7.25.9': - resolution: {integrity: sha512-omlUGkr5EaoIJrhLf9CJ0TvjBRpd9+AXRG//0GEQ9THSo8wPiTlbpy1/Ow8ZTrbXpjd9FHXfbFQx32I04ht0FA==} - engines: {node: '>=6.9.0'} - - '@babel/helper-compilation-targets@7.25.9': - resolution: {integrity: sha512-j9Db8Suy6yV/VHa4qzrj9yZfZxhLWQdVnRlXxmKLYlhWUVB1sB2G5sxuWYXk/whHD9iW76PmNzxZ4UCnTQTVEQ==} - engines: {node: '>=6.9.0'} - - '@babel/helper-module-imports@7.25.9': - resolution: {integrity: sha512-tnUA4RsrmflIM6W6RFTLFSXITtl0wKjgpnLgXyowocVPrbYrLUXSBXDgTs8BlbmIzIdlBySRQjINYs2BAkiLtw==} - engines: {node: '>=6.9.0'} - - '@babel/helper-module-transforms@7.25.9': - resolution: {integrity: sha512-TvLZY/F3+GvdRYFZFyxMvnsKi+4oJdgZzU3BoGN9Uc2d9C6zfNwJcKKhjqLAhK8i46mv93jsO74fDh3ih6rpHA==} - engines: {node: '>=6.9.0'} + "@ampproject/remapping@2.3.0": + resolution: + { + integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==, + } + engines: { node: ">=6.0.0" } + + "@babel/code-frame@7.25.9": + resolution: + { + integrity: sha512-z88xeGxnzehn2sqZ8UdGQEvYErF1odv2CftxInpSYJt6uHuPe9YjahKZITGs3l5LeI9d2ROG+obuDAoSlqbNfQ==, + } + engines: { node: ">=6.9.0" } + + "@babel/compat-data@7.25.9": + resolution: + { + integrity: sha512-yD+hEuJ/+wAJ4Ox2/rpNv5HIuPG82x3ZlQvYVn8iYCprdxzE7P1udpGF1jyjQVBU4dgznN+k2h103vxZ7NdPyw==, + } + engines: { node: ">=6.9.0" } + + "@babel/core@7.25.9": + resolution: + { + integrity: sha512-WYvQviPw+Qyib0v92AwNIrdLISTp7RfDkM7bPqBvpbnhY4wq8HvHBZREVdYDXk98C8BkOIVnHAY3yvj7AVISxQ==, + } + engines: { node: ">=6.9.0" } + + "@babel/generator@7.25.9": + resolution: + { + integrity: sha512-omlUGkr5EaoIJrhLf9CJ0TvjBRpd9+AXRG//0GEQ9THSo8wPiTlbpy1/Ow8ZTrbXpjd9FHXfbFQx32I04ht0FA==, + } + engines: { node: ">=6.9.0" } + + "@babel/helper-compilation-targets@7.25.9": + resolution: + { + integrity: sha512-j9Db8Suy6yV/VHa4qzrj9yZfZxhLWQdVnRlXxmKLYlhWUVB1sB2G5sxuWYXk/whHD9iW76PmNzxZ4UCnTQTVEQ==, + } + engines: { node: ">=6.9.0" } + + "@babel/helper-module-imports@7.25.9": + resolution: + { + integrity: sha512-tnUA4RsrmflIM6W6RFTLFSXITtl0wKjgpnLgXyowocVPrbYrLUXSBXDgTs8BlbmIzIdlBySRQjINYs2BAkiLtw==, + } + engines: { node: ">=6.9.0" } + + "@babel/helper-module-transforms@7.25.9": + resolution: + { + integrity: sha512-TvLZY/F3+GvdRYFZFyxMvnsKi+4oJdgZzU3BoGN9Uc2d9C6zfNwJcKKhjqLAhK8i46mv93jsO74fDh3ih6rpHA==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0 - - '@babel/helper-plugin-utils@7.25.9': - resolution: {integrity: sha512-kSMlyUVdWe25rEsRGviIgOWnoT/nfABVWlqt9N19/dIPWViAOW2s9wznP5tURbs/IDuNk4gPy3YdYRgH3uxhBw==} - engines: {node: '>=6.9.0'} - - '@babel/helper-simple-access@7.25.9': - resolution: {integrity: sha512-c6WHXuiaRsJTyHYLJV75t9IqsmTbItYfdj99PnzYGQZkYKvan5/2jKJ7gu31J3/BJ/A18grImSPModuyG/Eo0Q==} - engines: {node: '>=6.9.0'} - - '@babel/helper-string-parser@7.25.9': - resolution: {integrity: sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==} - engines: {node: '>=6.9.0'} - - '@babel/helper-validator-identifier@7.25.9': - resolution: {integrity: sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==} - engines: {node: '>=6.9.0'} - - '@babel/helper-validator-option@7.25.9': - resolution: {integrity: sha512-e/zv1co8pp55dNdEcCynfj9X7nyUKUXoUEwfXqaZt0omVOmDe9oOTdKStH4GmAw6zxMFs50ZayuMfHDKlO7Tfw==} - engines: {node: '>=6.9.0'} - - '@babel/helpers@7.25.9': - resolution: {integrity: sha512-oKWp3+usOJSzDZOucZUAMayhPz/xVjzymyDzUN8dk0Wd3RWMlGLXi07UCQ/CgQVb8LvXx3XBajJH4XGgkt7H7g==} - engines: {node: '>=6.9.0'} - - '@babel/highlight@7.25.9': - resolution: {integrity: sha512-llL88JShoCsth8fF8R4SJnIn+WLvR6ccFxu1H3FlMhDontdcmZWf2HgIZ7AIqV3Xcck1idlohrN4EUBQz6klbw==} - engines: {node: '>=6.9.0'} - - '@babel/parser@7.25.9': - resolution: {integrity: sha512-aI3jjAAO1fh7vY/pBGsn1i9LDbRP43+asrRlkPuTXW5yHXtd1NgTEMudbBoDDxrf1daEEfPJqR+JBMakzrR4Dg==} - engines: {node: '>=6.0.0'} + "@babel/core": ^7.0.0 + + "@babel/helper-plugin-utils@7.25.9": + resolution: + { + integrity: sha512-kSMlyUVdWe25rEsRGviIgOWnoT/nfABVWlqt9N19/dIPWViAOW2s9wznP5tURbs/IDuNk4gPy3YdYRgH3uxhBw==, + } + engines: { node: ">=6.9.0" } + + "@babel/helper-simple-access@7.25.9": + resolution: + { + integrity: sha512-c6WHXuiaRsJTyHYLJV75t9IqsmTbItYfdj99PnzYGQZkYKvan5/2jKJ7gu31J3/BJ/A18grImSPModuyG/Eo0Q==, + } + engines: { node: ">=6.9.0" } + + "@babel/helper-string-parser@7.25.9": + resolution: + { + integrity: sha512-4A/SCr/2KLd5jrtOMFzaKjVtAei3+2r/NChoBNoZ3EyP/+GlhoaEGoWOZUmFmoITP7zOJyHIMm+DYRd8o3PvHA==, + } + engines: { node: ">=6.9.0" } + + "@babel/helper-validator-identifier@7.25.9": + resolution: + { + integrity: sha512-Ed61U6XJc3CVRfkERJWDz4dJwKe7iLmmJsbOGu9wSloNSFttHV0I8g6UAgb7qnK5ly5bGLPd4oXZlxCdANBOWQ==, + } + engines: { node: ">=6.9.0" } + + "@babel/helper-validator-option@7.25.9": + resolution: + { + integrity: sha512-e/zv1co8pp55dNdEcCynfj9X7nyUKUXoUEwfXqaZt0omVOmDe9oOTdKStH4GmAw6zxMFs50ZayuMfHDKlO7Tfw==, + } + engines: { node: ">=6.9.0" } + + "@babel/helpers@7.25.9": + resolution: + { + integrity: sha512-oKWp3+usOJSzDZOucZUAMayhPz/xVjzymyDzUN8dk0Wd3RWMlGLXi07UCQ/CgQVb8LvXx3XBajJH4XGgkt7H7g==, + } + engines: { node: ">=6.9.0" } + + "@babel/highlight@7.25.9": + resolution: + { + integrity: sha512-llL88JShoCsth8fF8R4SJnIn+WLvR6ccFxu1H3FlMhDontdcmZWf2HgIZ7AIqV3Xcck1idlohrN4EUBQz6klbw==, + } + engines: { node: ">=6.9.0" } + + "@babel/parser@7.25.9": + resolution: + { + integrity: sha512-aI3jjAAO1fh7vY/pBGsn1i9LDbRP43+asrRlkPuTXW5yHXtd1NgTEMudbBoDDxrf1daEEfPJqR+JBMakzrR4Dg==, + } + engines: { node: ">=6.0.0" } hasBin: true - '@babel/plugin-syntax-async-generators@7.8.4': - resolution: {integrity: sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==} + "@babel/plugin-syntax-async-generators@7.8.4": + resolution: + { + integrity: sha512-tycmZxkGfZaxhMRbXlPXuVFpdWlXpir2W4AMhSJgRKzk/eDlIXOhb2LHWoLpDF7TEHylV5zNhykX6KAgHJmTNw==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 - '@babel/plugin-syntax-bigint@7.8.3': - resolution: {integrity: sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==} + "@babel/plugin-syntax-bigint@7.8.3": + resolution: + { + integrity: sha512-wnTnFlG+YxQm3vDxpGE57Pj0srRU4sHE/mDkt1qv2YJJSeUAec2ma4WLUnUPeKjyrfntVwe/N6dCXpU+zL3Npg==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 - '@babel/plugin-syntax-class-properties@7.12.13': - resolution: {integrity: sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==} + "@babel/plugin-syntax-class-properties@7.12.13": + resolution: + { + integrity: sha512-fm4idjKla0YahUNgFNLCB0qySdsoPiZP3iQE3rky0mBUtMZ23yDJ9SJdg6dXTSDnulOVqiF3Hgr9nbXvXTQZYA==, + } peerDependencies: - '@babel/core': ^7.0.0-0 - - '@babel/plugin-syntax-class-static-block@7.14.5': - resolution: {integrity: sha512-b+YyPmr6ldyNnM6sqYeMWE+bgJcJpO6yS4QD7ymxgH34GBPNDM/THBh8iunyvKIZztiwLH4CJZ0RxTk9emgpjw==} - engines: {node: '>=6.9.0'} + "@babel/core": ^7.0.0-0 + + "@babel/plugin-syntax-class-static-block@7.14.5": + resolution: + { + integrity: sha512-b+YyPmr6ldyNnM6sqYeMWE+bgJcJpO6yS4QD7ymxgH34GBPNDM/THBh8iunyvKIZztiwLH4CJZ0RxTk9emgpjw==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 - - '@babel/plugin-syntax-import-attributes@7.25.9': - resolution: {integrity: sha512-u3EN9ub8LyYvgTnrgp8gboElouayiwPdnM7x5tcnW3iSt09/lQYPwMNK40I9IUxo7QOZhAsPHCmmuO7EPdruqg==} - engines: {node: '>=6.9.0'} + "@babel/core": ^7.0.0-0 + + "@babel/plugin-syntax-import-attributes@7.25.9": + resolution: + { + integrity: sha512-u3EN9ub8LyYvgTnrgp8gboElouayiwPdnM7x5tcnW3iSt09/lQYPwMNK40I9IUxo7QOZhAsPHCmmuO7EPdruqg==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 - '@babel/plugin-syntax-import-meta@7.10.4': - resolution: {integrity: sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==} + "@babel/plugin-syntax-import-meta@7.10.4": + resolution: + { + integrity: sha512-Yqfm+XDx0+Prh3VSeEQCPU81yC+JWZ2pDPFSS4ZdpfZhp4MkFMaDC1UqseovEKwSUpnIL7+vK+Clp7bfh0iD7g==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 - '@babel/plugin-syntax-json-strings@7.8.3': - resolution: {integrity: sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==} + "@babel/plugin-syntax-json-strings@7.8.3": + resolution: + { + integrity: sha512-lY6kdGpWHvjoe2vk4WrAapEuBR69EMxZl+RoGRhrFGNYVK8mOPAW8VfbT/ZgrFbXlDNiiaxQnAtgVCZ6jv30EA==, + } peerDependencies: - '@babel/core': ^7.0.0-0 - - '@babel/plugin-syntax-jsx@7.25.9': - resolution: {integrity: sha512-ld6oezHQMZsZfp6pWtbjaNDF2tiiCYYDqQszHt5VV437lewP9aSi2Of99CK0D0XB21k7FLgnLcmQKyKzynfeAA==} - engines: {node: '>=6.9.0'} + "@babel/core": ^7.0.0-0 + + "@babel/plugin-syntax-jsx@7.25.9": + resolution: + { + integrity: sha512-ld6oezHQMZsZfp6pWtbjaNDF2tiiCYYDqQszHt5VV437lewP9aSi2Of99CK0D0XB21k7FLgnLcmQKyKzynfeAA==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 - '@babel/plugin-syntax-logical-assignment-operators@7.10.4': - resolution: {integrity: sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==} + "@babel/plugin-syntax-logical-assignment-operators@7.10.4": + resolution: + { + integrity: sha512-d8waShlpFDinQ5MtvGU9xDAOzKH47+FFoney2baFIoMr952hKOLp1HR7VszoZvOsV/4+RRszNY7D17ba0te0ig==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 - '@babel/plugin-syntax-nullish-coalescing-operator@7.8.3': - resolution: {integrity: sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==} + "@babel/plugin-syntax-nullish-coalescing-operator@7.8.3": + resolution: + { + integrity: sha512-aSff4zPII1u2QD7y+F8oDsz19ew4IGEJg9SVW+bqwpwtfFleiQDMdzA/R+UlWDzfnHFCxxleFT0PMIrR36XLNQ==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 - '@babel/plugin-syntax-numeric-separator@7.10.4': - resolution: {integrity: sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==} + "@babel/plugin-syntax-numeric-separator@7.10.4": + resolution: + { + integrity: sha512-9H6YdfkcK/uOnY/K7/aA2xpzaAgkQn37yzWUMRK7OaPOqOpGS1+n0H5hxT9AUw9EsSjPW8SVyMJwYRtWs3X3ug==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 - '@babel/plugin-syntax-object-rest-spread@7.8.3': - resolution: {integrity: sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==} + "@babel/plugin-syntax-object-rest-spread@7.8.3": + resolution: + { + integrity: sha512-XoqMijGZb9y3y2XskN+P1wUGiVwWZ5JmoDRwx5+3GmEplNyVM2s2Dg8ILFQm8rWM48orGy5YpI5Bl8U1y7ydlA==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 - '@babel/plugin-syntax-optional-catch-binding@7.8.3': - resolution: {integrity: sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==} + "@babel/plugin-syntax-optional-catch-binding@7.8.3": + resolution: + { + integrity: sha512-6VPD0Pc1lpTqw0aKoeRTMiB+kWhAoT24PA+ksWSBrFtl5SIRVpZlwN3NNPQjehA2E/91FV3RjLWoVTglWcSV3Q==, + } peerDependencies: - '@babel/core': ^7.0.0-0 + "@babel/core": ^7.0.0-0 - '@babel/plugin-syntax-optional-chaining@7.8.3': - resolution: {integrity: sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==} + "@babel/plugin-syntax-optional-chaining@7.8.3": + resolution: + { + integrity: sha512-KoK9ErH1MBlCPxV0VANkXW2/dw4vlbGDrFgz8bmUsBGYkFRcbRwMh6cIJubdPrkxRwuGdtCk0v/wPTKbQgBjkg==, + } peerDependencies: - '@babel/core': ^7.0.0-0 - - '@babel/plugin-syntax-private-property-in-object@7.14.5': - resolution: {integrity: sha512-0wVnp9dxJ72ZUJDV27ZfbSj6iHLoytYZmh3rFcxNnvsJF3ktkzLDZPy/mA17HGsaQT3/DQsWYX1f1QGWkCoVUg==} - engines: {node: '>=6.9.0'} + "@babel/core": ^7.0.0-0 + + "@babel/plugin-syntax-private-property-in-object@7.14.5": + resolution: + { + integrity: sha512-0wVnp9dxJ72ZUJDV27ZfbSj6iHLoytYZmh3rFcxNnvsJF3ktkzLDZPy/mA17HGsaQT3/DQsWYX1f1QGWkCoVUg==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 - - '@babel/plugin-syntax-top-level-await@7.14.5': - resolution: {integrity: sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==} - engines: {node: '>=6.9.0'} + "@babel/core": ^7.0.0-0 + + "@babel/plugin-syntax-top-level-await@7.14.5": + resolution: + { + integrity: sha512-hx++upLv5U1rgYfwe1xBQUhRmU41NEvpUvrp8jkrSCdvGSnM5/qdRMtylJ6PG5OFkBaHkbTAKTnd3/YyESRHFw==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 - - '@babel/plugin-syntax-typescript@7.25.9': - resolution: {integrity: sha512-hjMgRy5hb8uJJjUcdWunWVcoi9bGpJp8p5Ol1229PoN6aytsLwNMgmdftO23wnCLMfVmTwZDWMPNq/D1SY60JQ==} - engines: {node: '>=6.9.0'} + "@babel/core": ^7.0.0-0 + + "@babel/plugin-syntax-typescript@7.25.9": + resolution: + { + integrity: sha512-hjMgRy5hb8uJJjUcdWunWVcoi9bGpJp8p5Ol1229PoN6aytsLwNMgmdftO23wnCLMfVmTwZDWMPNq/D1SY60JQ==, + } + engines: { node: ">=6.9.0" } peerDependencies: - '@babel/core': ^7.0.0-0 - - '@babel/template@7.25.9': - resolution: {integrity: sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==} - engines: {node: '>=6.9.0'} - - '@babel/traverse@7.25.9': - resolution: {integrity: sha512-ZCuvfwOwlz/bawvAuvcj8rrithP2/N55Tzz342AkTvq4qaWbGfmCk/tKhNaV2cthijKrPAA8SRJV5WWe7IBMJw==} - engines: {node: '>=6.9.0'} - - '@babel/types@7.25.9': - resolution: {integrity: sha512-OwS2CM5KocvQ/k7dFJa8i5bNGJP0hXWfVCfDkqRFP1IreH1JDC7wG6eCYCi0+McbfT8OR/kNqsI0UU0xP9H6PQ==} - engines: {node: '>=6.9.0'} - - '@bcoe/v8-coverage@0.2.3': - resolution: {integrity: sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==} - - '@cspotcode/source-map-support@0.8.1': - resolution: {integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==} - engines: {node: '>=12'} - - '@istanbuljs/load-nyc-config@1.1.0': - resolution: {integrity: sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==} - engines: {node: '>=8'} - - '@istanbuljs/schema@0.1.3': - resolution: {integrity: sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==} - engines: {node: '>=8'} - - '@jest/console@29.7.0': - resolution: {integrity: sha512-5Ni4CU7XHQi32IJ398EEP4RrB8eV09sXP2ROqD4bksHrnTree52PsxvX8tpL8LvTZ3pFzXyPbNQReSN41CAhOg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/core@29.7.0': - resolution: {integrity: sha512-n7aeXWKMnGtDA48y8TLWJPJmLmmZ642Ceo78cYWEpiD7FzDgmNDV/GCVRorPABdXLJZ/9wzzgZAlHjXjxDHGsg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + "@babel/core": ^7.0.0-0 + + "@babel/template@7.25.9": + resolution: + { + integrity: sha512-9DGttpmPvIxBb/2uwpVo3dqJ+O6RooAFOS+lB+xDqoE2PVCE8nfoHMdZLpfCQRLwvohzXISPZcgxt80xLfsuwg==, + } + engines: { node: ">=6.9.0" } + + "@babel/traverse@7.25.9": + resolution: + { + integrity: sha512-ZCuvfwOwlz/bawvAuvcj8rrithP2/N55Tzz342AkTvq4qaWbGfmCk/tKhNaV2cthijKrPAA8SRJV5WWe7IBMJw==, + } + engines: { node: ">=6.9.0" } + + "@babel/types@7.25.9": + resolution: + { + integrity: sha512-OwS2CM5KocvQ/k7dFJa8i5bNGJP0hXWfVCfDkqRFP1IreH1JDC7wG6eCYCi0+McbfT8OR/kNqsI0UU0xP9H6PQ==, + } + engines: { node: ">=6.9.0" } + + "@bcoe/v8-coverage@0.2.3": + resolution: + { + integrity: sha512-0hYQ8SB4Db5zvZB4axdMHGwEaQjkZzFjQiN9LVYvIFB2nSUHW9tYpxWriPrWDASIxiaXax83REcLxuSdnGPZtw==, + } + + "@cspotcode/source-map-support@0.8.1": + resolution: + { + integrity: sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==, + } + engines: { node: ">=12" } + + "@istanbuljs/load-nyc-config@1.1.0": + resolution: + { + integrity: sha512-VjeHSlIzpv/NyD3N0YuHfXOPDIixcA1q2ZV98wsMqcYlPmv2n3Yb2lYP9XMElnaFVXg5A7YLTeLu6V84uQDjmQ==, + } + engines: { node: ">=8" } + + "@istanbuljs/schema@0.1.3": + resolution: + { + integrity: sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA==, + } + engines: { node: ">=8" } + + "@jest/console@29.7.0": + resolution: + { + integrity: sha512-5Ni4CU7XHQi32IJ398EEP4RrB8eV09sXP2ROqD4bksHrnTree52PsxvX8tpL8LvTZ3pFzXyPbNQReSN41CAhOg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/core@29.7.0": + resolution: + { + integrity: sha512-n7aeXWKMnGtDA48y8TLWJPJmLmmZ642Ceo78cYWEpiD7FzDgmNDV/GCVRorPABdXLJZ/9wzzgZAlHjXjxDHGsg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } peerDependencies: node-notifier: ^8.0.1 || ^9.0.0 || ^10.0.0 peerDependenciesMeta: node-notifier: optional: true - '@jest/environment@29.7.0': - resolution: {integrity: sha512-aQIfHDq33ExsN4jP1NWGXhxgQ/wixs60gDiKO+XVMd8Mn0NWPWgc34ZQDTb2jKaUWQ7MuwoitXAsN2XVXNMpAw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/expect-utils@29.7.0': - resolution: {integrity: sha512-GlsNBWiFQFCVi9QVSx7f5AgMeLxe9YCCs5PuP2O2LdjDAA8Jh9eX7lA1Jq/xdXw3Wb3hyvlFNfZIfcRetSzYcA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/expect@29.7.0': - resolution: {integrity: sha512-8uMeAMycttpva3P1lBHB8VciS9V0XAr3GymPpipdyQXbBcuhkLQOSe8E/p92RyAdToS6ZD1tFkX+CkhoECE0dQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/fake-timers@29.7.0': - resolution: {integrity: sha512-q4DH1Ha4TTFPdxLsqDXK1d3+ioSL7yL5oCMJZgDYm6i+6CygW5E5xVr/D1HdsGxjt1ZWSfUAs9OxSB/BNelWrQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/globals@29.7.0': - resolution: {integrity: sha512-mpiz3dutLbkW2MNFubUGUEVLkTGiqW6yLVTA+JbP6fI6J5iL9Y0Nlg8k95pcF8ctKwCS7WVxteBs29hhfAotzQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/reporters@29.7.0': - resolution: {integrity: sha512-DApq0KJbJOEzAFYjHADNNxAE3KbhxQB1y5Kplb5Waqw6zVbuWatSnMjE5gs8FUgEPmNsnZA3NCWl9NG0ia04Pg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + "@jest/environment@29.7.0": + resolution: + { + integrity: sha512-aQIfHDq33ExsN4jP1NWGXhxgQ/wixs60gDiKO+XVMd8Mn0NWPWgc34ZQDTb2jKaUWQ7MuwoitXAsN2XVXNMpAw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/expect-utils@29.7.0": + resolution: + { + integrity: sha512-GlsNBWiFQFCVi9QVSx7f5AgMeLxe9YCCs5PuP2O2LdjDAA8Jh9eX7lA1Jq/xdXw3Wb3hyvlFNfZIfcRetSzYcA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/expect@29.7.0": + resolution: + { + integrity: sha512-8uMeAMycttpva3P1lBHB8VciS9V0XAr3GymPpipdyQXbBcuhkLQOSe8E/p92RyAdToS6ZD1tFkX+CkhoECE0dQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/fake-timers@29.7.0": + resolution: + { + integrity: sha512-q4DH1Ha4TTFPdxLsqDXK1d3+ioSL7yL5oCMJZgDYm6i+6CygW5E5xVr/D1HdsGxjt1ZWSfUAs9OxSB/BNelWrQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/globals@29.7.0": + resolution: + { + integrity: sha512-mpiz3dutLbkW2MNFubUGUEVLkTGiqW6yLVTA+JbP6fI6J5iL9Y0Nlg8k95pcF8ctKwCS7WVxteBs29hhfAotzQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/reporters@29.7.0": + resolution: + { + integrity: sha512-DApq0KJbJOEzAFYjHADNNxAE3KbhxQB1y5Kplb5Waqw6zVbuWatSnMjE5gs8FUgEPmNsnZA3NCWl9NG0ia04Pg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } peerDependencies: node-notifier: ^8.0.1 || ^9.0.0 || ^10.0.0 peerDependenciesMeta: node-notifier: optional: true - '@jest/schemas@29.6.3': - resolution: {integrity: sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/source-map@29.6.3': - resolution: {integrity: sha512-MHjT95QuipcPrpLM+8JMSzFx6eHp5Bm+4XeFDJlwsvVBjmKNiIAvasGK2fxz2WbGRlnvqehFbh07MMa7n3YJnw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/test-result@29.7.0': - resolution: {integrity: sha512-Fdx+tv6x1zlkJPcWXmMDAG2HBnaR9XPSd5aDWQVsfrZmLVT3lU1cwyxLgRmXR9yrq4NBoEm9BMsfgFzTQAbJYA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/test-sequencer@29.7.0': - resolution: {integrity: sha512-GQwJ5WZVrKnOJuiYiAF52UNUJXgTZx1NHjFSEB0qEMmSZKAkdMoIzw/Cj6x6NF4AvV23AUqDpFzQkN/eYCYTxw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/transform@29.7.0': - resolution: {integrity: sha512-ok/BTPFzFKVMwO5eOHRrvnBVHdRy9IrsrW1GpMaQ9MCnilNLXQKmAX8s1YXDFaai9xJpac2ySzV0YeRRECr2Vw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jest/types@29.6.3': - resolution: {integrity: sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} - - '@jridgewell/gen-mapping@0.3.5': - resolution: {integrity: sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==} - engines: {node: '>=6.0.0'} - - '@jridgewell/resolve-uri@3.1.2': - resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==} - engines: {node: '>=6.0.0'} - - '@jridgewell/set-array@1.2.1': - resolution: {integrity: sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==} - engines: {node: '>=6.0.0'} - - '@jridgewell/sourcemap-codec@1.5.0': - resolution: {integrity: sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==} - - '@jridgewell/trace-mapping@0.3.25': - resolution: {integrity: sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==} - - '@jridgewell/trace-mapping@0.3.9': - resolution: {integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==} - - '@rrweb/record@2.0.0-alpha.17': - resolution: {integrity: sha512-Je+lzjeWMF8/I0IDoXFzkGPKT8j7AkaBup5YcwUHlkp18VhLVze416MvI6915teE27uUA2ScXMXzG0Yiu5VTIw==} - - '@rrweb/types@2.0.0-alpha.17': - resolution: {integrity: sha512-AfDTVUuCyCaIG0lTSqYtrZqJX39ZEYzs4fYKnexhQ+id+kbZIpIJtaut5cto6dWZbB3SEe4fW0o90Po3LvTmfg==} - - '@rrweb/utils@2.0.0-alpha.17': - resolution: {integrity: sha512-HCsasPERBwOS9/LQeOytO2ETKTCqRj1wORBuxiy3t41hKhmi225DdrUPiWnyDdTQm1GdVbOymMRknJVPnZaSXw==} - - '@sinclair/typebox@0.27.8': - resolution: {integrity: sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==} - - '@sinonjs/commons@3.0.1': - resolution: {integrity: sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ==} - - '@sinonjs/fake-timers@10.3.0': - resolution: {integrity: sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==} - - '@tsconfig/node10@1.0.11': - resolution: {integrity: sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==} - - '@tsconfig/node12@1.0.11': - resolution: {integrity: sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==} - - '@tsconfig/node14@1.0.3': - resolution: {integrity: sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==} - - '@tsconfig/node16@1.0.4': - resolution: {integrity: sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==} - - '@types/babel__core@7.20.5': - resolution: {integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==} - - '@types/babel__generator@7.6.8': - resolution: {integrity: sha512-ASsj+tpEDsEiFr1arWrlN6V3mdfjRMZt6LtK/Vp/kreFLnr5QH5+DhvD5nINYZXzwJvXeGq+05iUXcAzVrqWtw==} - - '@types/babel__template@7.4.4': - resolution: {integrity: sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==} - - '@types/babel__traverse@7.20.6': - resolution: {integrity: sha512-r1bzfrm0tomOI8g1SzvCaQHo6Lcv6zu0EA+W2kHrt8dyrHQxGzBBL4kdkzIS+jBMV+EYcMAEAqXqYaLJq5rOZg==} - - '@types/css-font-loading-module@0.0.7': - resolution: {integrity: sha512-nl09VhutdjINdWyXxHWN/w9zlNCfr60JUqJbd24YXUuCwgeL0TpFSdElCwb6cxfB6ybE19Gjj4g0jsgkXxKv1Q==} - - '@types/graceful-fs@4.1.9': - resolution: {integrity: sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==} - - '@types/istanbul-lib-coverage@2.0.6': - resolution: {integrity: sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==} - - '@types/istanbul-lib-report@3.0.3': - resolution: {integrity: sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==} - - '@types/istanbul-reports@3.0.4': - resolution: {integrity: sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==} - - '@types/jest@29.5.13': - resolution: {integrity: sha512-wd+MVEZCHt23V0/L642O5APvspWply/rGY5BcW4SUETo2UzPU3Z26qr8jC2qxpimI2jjx9h7+2cj2FwIr01bXg==} - - '@types/node@20.16.14': - resolution: {integrity: sha512-vtgGzjxLF7QT88qRHtXMzCWpAAmwonE7fwgVjFtXosUva2oSpnIEc3gNO9P7uIfOxKnii2f79/xtOnfreYtDaA==} - - '@types/stack-utils@2.0.3': - resolution: {integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==} - - '@types/uuid@10.0.0': - resolution: {integrity: sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==} - - '@types/yargs-parser@21.0.3': - resolution: {integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==} - - '@types/yargs@17.0.33': - resolution: {integrity: sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==} - - '@xstate/fsm@1.6.5': - resolution: {integrity: sha512-b5o1I6aLNeYlU/3CPlj/Z91ybk1gUsKT+5NAJI+2W4UjvS5KLG28K9v5UvNoFVjHV8PajVZ00RH3vnjyQO7ZAw==} + "@jest/schemas@29.6.3": + resolution: + { + integrity: sha512-mo5j5X+jIZmJQveBKeS/clAueipV7KgiX1vMgCxam1RNYiqE1w62n0/tJJnHtjW8ZHcQco5gY85jA3mi0L+nSA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/source-map@29.6.3": + resolution: + { + integrity: sha512-MHjT95QuipcPrpLM+8JMSzFx6eHp5Bm+4XeFDJlwsvVBjmKNiIAvasGK2fxz2WbGRlnvqehFbh07MMa7n3YJnw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/test-result@29.7.0": + resolution: + { + integrity: sha512-Fdx+tv6x1zlkJPcWXmMDAG2HBnaR9XPSd5aDWQVsfrZmLVT3lU1cwyxLgRmXR9yrq4NBoEm9BMsfgFzTQAbJYA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/test-sequencer@29.7.0": + resolution: + { + integrity: sha512-GQwJ5WZVrKnOJuiYiAF52UNUJXgTZx1NHjFSEB0qEMmSZKAkdMoIzw/Cj6x6NF4AvV23AUqDpFzQkN/eYCYTxw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/transform@29.7.0": + resolution: + { + integrity: sha512-ok/BTPFzFKVMwO5eOHRrvnBVHdRy9IrsrW1GpMaQ9MCnilNLXQKmAX8s1YXDFaai9xJpac2ySzV0YeRRECr2Vw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jest/types@29.6.3": + resolution: + { + integrity: sha512-u3UPsIilWKOM3F9CXtrG8LEJmNxwoCQC/XVj4IKYXvvpx7QIi/Kg1LI5uDmDpKlac62NUtX7eLjRh+jVZcLOzw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } + + "@jridgewell/gen-mapping@0.3.5": + resolution: + { + integrity: sha512-IzL8ZoEDIBRWEzlCcRhOaCupYyN5gdIK+Q6fbFdPDg6HqX6jpkItn7DFIpW9LQzXG6Df9sA7+OKnq0qlz/GaQg==, + } + engines: { node: ">=6.0.0" } + + "@jridgewell/resolve-uri@3.1.2": + resolution: + { + integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==, + } + engines: { node: ">=6.0.0" } + + "@jridgewell/set-array@1.2.1": + resolution: + { + integrity: sha512-R8gLRTZeyp03ymzP/6Lil/28tGeGEzhx1q2k703KGWRAI1VdvPIXdG70VJc2pAMw3NA6JKL5hhFu1sJX0Mnn/A==, + } + engines: { node: ">=6.0.0" } + + "@jridgewell/sourcemap-codec@1.5.0": + resolution: + { + integrity: sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==, + } + + "@jridgewell/trace-mapping@0.3.25": + resolution: + { + integrity: sha512-vNk6aEwybGtawWmy/PzwnGDOjCkLWSD2wqvjGGAgOAwCGWySYXfYoxt00IJkTF+8Lb57DwOb3Aa0o9CApepiYQ==, + } + + "@jridgewell/trace-mapping@0.3.9": + resolution: + { + integrity: sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==, + } + + "@rrweb/record@2.0.0-alpha.17": + resolution: + { + integrity: sha512-Je+lzjeWMF8/I0IDoXFzkGPKT8j7AkaBup5YcwUHlkp18VhLVze416MvI6915teE27uUA2ScXMXzG0Yiu5VTIw==, + } + + "@rrweb/types@2.0.0-alpha.17": + resolution: + { + integrity: sha512-AfDTVUuCyCaIG0lTSqYtrZqJX39ZEYzs4fYKnexhQ+id+kbZIpIJtaut5cto6dWZbB3SEe4fW0o90Po3LvTmfg==, + } + + "@rrweb/utils@2.0.0-alpha.17": + resolution: + { + integrity: sha512-HCsasPERBwOS9/LQeOytO2ETKTCqRj1wORBuxiy3t41hKhmi225DdrUPiWnyDdTQm1GdVbOymMRknJVPnZaSXw==, + } + + "@sinclair/typebox@0.27.8": + resolution: + { + integrity: sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==, + } + + "@sinonjs/commons@3.0.1": + resolution: + { + integrity: sha512-K3mCHKQ9sVh8o1C9cxkwxaOmXoAMlDxC1mYyHrjqOWEcBjYr76t96zL2zlj5dUGZ3HSw240X1qgH3Mjf1yJWpQ==, + } + + "@sinonjs/fake-timers@10.3.0": + resolution: + { + integrity: sha512-V4BG07kuYSUkTCSBHG8G8TNhM+F19jXFWnQtzj+we8DrkpSBCee9Z3Ms8yiGer/dlmhe35/Xdgyo3/0rQKg7YA==, + } + + "@tsconfig/node10@1.0.11": + resolution: + { + integrity: sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==, + } + + "@tsconfig/node12@1.0.11": + resolution: + { + integrity: sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==, + } + + "@tsconfig/node14@1.0.3": + resolution: + { + integrity: sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==, + } + + "@tsconfig/node16@1.0.4": + resolution: + { + integrity: sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==, + } + + "@types/babel__core@7.20.5": + resolution: + { + integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==, + } + + "@types/babel__generator@7.6.8": + resolution: + { + integrity: sha512-ASsj+tpEDsEiFr1arWrlN6V3mdfjRMZt6LtK/Vp/kreFLnr5QH5+DhvD5nINYZXzwJvXeGq+05iUXcAzVrqWtw==, + } + + "@types/babel__template@7.4.4": + resolution: + { + integrity: sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==, + } + + "@types/babel__traverse@7.20.6": + resolution: + { + integrity: sha512-r1bzfrm0tomOI8g1SzvCaQHo6Lcv6zu0EA+W2kHrt8dyrHQxGzBBL4kdkzIS+jBMV+EYcMAEAqXqYaLJq5rOZg==, + } + + "@types/css-font-loading-module@0.0.7": + resolution: + { + integrity: sha512-nl09VhutdjINdWyXxHWN/w9zlNCfr60JUqJbd24YXUuCwgeL0TpFSdElCwb6cxfB6ybE19Gjj4g0jsgkXxKv1Q==, + } + + "@types/graceful-fs@4.1.9": + resolution: + { + integrity: sha512-olP3sd1qOEe5dXTSaFvQG+02VdRXcdytWLAZsAq1PecU8uqQAhkrnbli7DagjtXKW/Bl7YJbUsa8MPcuc8LHEQ==, + } + + "@types/istanbul-lib-coverage@2.0.6": + resolution: + { + integrity: sha512-2QF/t/auWm0lsy8XtKVPG19v3sSOQlJe/YHZgfjb/KBBHOGSV+J2q/S671rcq9uTBrLAXmZpqJiaQbMT+zNU1w==, + } + + "@types/istanbul-lib-report@3.0.3": + resolution: + { + integrity: sha512-NQn7AHQnk/RSLOxrBbGyJM/aVQ+pjj5HCgasFxc0K/KhoATfQ/47AyUl15I2yBUpihjmas+a+VJBOqecrFH+uA==, + } + + "@types/istanbul-reports@3.0.4": + resolution: + { + integrity: sha512-pk2B1NWalF9toCRu6gjBzR69syFjP4Od8WRAX+0mmf9lAjCRicLOWc+ZrxZHx/0XRjotgkF9t6iaMJ+aXcOdZQ==, + } + + "@types/jest@29.5.13": + resolution: + { + integrity: sha512-wd+MVEZCHt23V0/L642O5APvspWply/rGY5BcW4SUETo2UzPU3Z26qr8jC2qxpimI2jjx9h7+2cj2FwIr01bXg==, + } + + "@types/node@20.16.14": + resolution: + { + integrity: sha512-vtgGzjxLF7QT88qRHtXMzCWpAAmwonE7fwgVjFtXosUva2oSpnIEc3gNO9P7uIfOxKnii2f79/xtOnfreYtDaA==, + } + + "@types/stack-utils@2.0.3": + resolution: + { + integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==, + } + + "@types/uuid@10.0.0": + resolution: + { + integrity: sha512-7gqG38EyHgyP1S+7+xomFtL+ZNHcKv6DwNaCZmJmo1vgMugyF3TCnXVg4t1uk89mLNwnLtnY3TpOpCOyp1/xHQ==, + } + + "@types/yargs-parser@21.0.3": + resolution: + { + integrity: sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==, + } + + "@types/yargs@17.0.33": + resolution: + { + integrity: sha512-WpxBCKWPLr4xSsHgz511rFJAM+wS28w2zEO1QDNY5zM/S8ok70NNfztH0xwhqKyaK0OHCbN98LDAZuy1ctxDkA==, + } + + "@xstate/fsm@1.6.5": + resolution: + { + integrity: sha512-b5o1I6aLNeYlU/3CPlj/Z91ybk1gUsKT+5NAJI+2W4UjvS5KLG28K9v5UvNoFVjHV8PajVZ00RH3vnjyQO7ZAw==, + } acorn-walk@8.3.4: - resolution: {integrity: sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==} - engines: {node: '>=0.4.0'} + resolution: + { + integrity: sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==, + } + engines: { node: ">=0.4.0" } acorn@8.13.0: - resolution: {integrity: sha512-8zSiw54Oxrdym50NlZ9sUusyO1Z1ZchgRLWRaK6c86XJFClyCgFKetdowBg5bKxyp/u+CDBJG4Mpp0m3HLZl9w==} - engines: {node: '>=0.4.0'} + resolution: + { + integrity: sha512-8zSiw54Oxrdym50NlZ9sUusyO1Z1ZchgRLWRaK6c86XJFClyCgFKetdowBg5bKxyp/u+CDBJG4Mpp0m3HLZl9w==, + } + engines: { node: ">=0.4.0" } hasBin: true ansi-escapes@4.3.2: - resolution: {integrity: sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-gKXj5ALrKWQLsYG9jlTRmR/xKluxHV+Z9QEwNIgCfM1/uwPMCuzVVnh5mwTd+OuBZcwSIMbqssNWRm1lE51QaQ==, + } + engines: { node: ">=8" } ansi-regex@5.0.1: - resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==, + } + engines: { node: ">=8" } ansi-styles@3.2.1: - resolution: {integrity: sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==, + } + engines: { node: ">=4" } ansi-styles@4.3.0: - resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==, + } + engines: { node: ">=8" } ansi-styles@5.2.0: - resolution: {integrity: sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==, + } + engines: { node: ">=10" } anymatch@3.1.3: - resolution: {integrity: sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==} - engines: {node: '>= 8'} + resolution: + { + integrity: sha512-KMReFUr0B4t+D+OBkjR3KYqvocp2XaSzO55UcB6mgQMd3KbcE+mWTyvVV7D/zsdEbNnV6acZUutkiHQXvTr1Rw==, + } + engines: { node: ">= 8" } arg@4.1.3: - resolution: {integrity: sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==} + resolution: + { + integrity: sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==, + } argparse@1.0.10: - resolution: {integrity: sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==} + resolution: + { + integrity: sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==, + } async@3.2.6: - resolution: {integrity: sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==} + resolution: + { + integrity: sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA==, + } asynckit@0.4.0: - resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==} + resolution: + { + integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==, + } axios@1.7.7: - resolution: {integrity: sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==} + resolution: + { + integrity: sha512-S4kL7XrjgBmvdGut0sN3yJxqYzrDOnivkBiN0OFs6hLiUam3UPvswUo0kqGyhqUZGEOytHyumEdXsAkgCOUf3Q==, + } babel-jest@29.7.0: - resolution: {integrity: sha512-BrvGY3xZSwEcCzKvKsCi2GgHqDqsYkOP4/by5xCgIwGXQxIEh+8ew3gmrE1y7XRR6LHZIj6yLYnUi/mm2KXKBg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-BrvGY3xZSwEcCzKvKsCi2GgHqDqsYkOP4/by5xCgIwGXQxIEh+8ew3gmrE1y7XRR6LHZIj6yLYnUi/mm2KXKBg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } peerDependencies: - '@babel/core': ^7.8.0 + "@babel/core": ^7.8.0 babel-plugin-istanbul@6.1.1: - resolution: {integrity: sha512-Y1IQok9821cC9onCx5otgFfRm7Lm+I+wwxOx738M/WLPZ9Q42m4IG5W0FNX8WLL2gYMZo3JkuXIH2DOpWM+qwA==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-Y1IQok9821cC9onCx5otgFfRm7Lm+I+wwxOx738M/WLPZ9Q42m4IG5W0FNX8WLL2gYMZo3JkuXIH2DOpWM+qwA==, + } + engines: { node: ">=8" } babel-plugin-jest-hoist@29.6.3: - resolution: {integrity: sha512-ESAc/RJvGTFEzRwOTT4+lNDk/GNHMkKbNzsvT0qKRfDyyYTskxB5rnU2njIDYVxXCBHHEI1c0YwHob3WaYujOg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-ESAc/RJvGTFEzRwOTT4+lNDk/GNHMkKbNzsvT0qKRfDyyYTskxB5rnU2njIDYVxXCBHHEI1c0YwHob3WaYujOg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } babel-preset-current-node-syntax@1.1.0: - resolution: {integrity: sha512-ldYss8SbBlWva1bs28q78Ju5Zq1F+8BrqBZZ0VFhLBvhh6lCpC2o3gDJi/5DRLs9FgYZCnmPYIVFU4lRXCkyUw==} + resolution: + { + integrity: sha512-ldYss8SbBlWva1bs28q78Ju5Zq1F+8BrqBZZ0VFhLBvhh6lCpC2o3gDJi/5DRLs9FgYZCnmPYIVFU4lRXCkyUw==, + } peerDependencies: - '@babel/core': ^7.0.0 + "@babel/core": ^7.0.0 babel-preset-jest@29.6.3: - resolution: {integrity: sha512-0B3bhxR6snWXJZtR/RliHTDPRgn1sNHOR0yVtq/IiQFyuOVjFS+wuio/R4gSNkyYmKmJB4wGZv2NZanmKmTnNA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-0B3bhxR6snWXJZtR/RliHTDPRgn1sNHOR0yVtq/IiQFyuOVjFS+wuio/R4gSNkyYmKmJB4wGZv2NZanmKmTnNA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } peerDependencies: - '@babel/core': ^7.0.0 + "@babel/core": ^7.0.0 balanced-match@1.0.2: - resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + resolution: + { + integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==, + } base64-arraybuffer@1.0.2: - resolution: {integrity: sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==} - engines: {node: '>= 0.6.0'} + resolution: + { + integrity: sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==, + } + engines: { node: ">= 0.6.0" } brace-expansion@1.1.11: - resolution: {integrity: sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==} + resolution: + { + integrity: sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==, + } brace-expansion@2.0.1: - resolution: {integrity: sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==} + resolution: + { + integrity: sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==, + } braces@3.0.3: - resolution: {integrity: sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==, + } + engines: { node: ">=8" } browserslist@4.24.2: - resolution: {integrity: sha512-ZIc+Q62revdMcqC6aChtW4jz3My3klmCO1fEmINZY/8J3EpBg5/A/D0AKmBveUh6pgoeycoMkVMko84tuYS+Gg==} - engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7} + resolution: + { + integrity: sha512-ZIc+Q62revdMcqC6aChtW4jz3My3klmCO1fEmINZY/8J3EpBg5/A/D0AKmBveUh6pgoeycoMkVMko84tuYS+Gg==, + } + engines: { node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7 } hasBin: true bs-logger@0.2.6: - resolution: {integrity: sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==} - engines: {node: '>= 6'} + resolution: + { + integrity: sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==, + } + engines: { node: ">= 6" } bser@2.1.1: - resolution: {integrity: sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==} + resolution: + { + integrity: sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==, + } buffer-from@1.1.2: - resolution: {integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==} + resolution: + { + integrity: sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ==, + } callsites@3.1.0: - resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==, + } + engines: { node: ">=6" } camelcase@5.3.1: - resolution: {integrity: sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-L28STB170nwWS63UjtlEOE3dldQApaJXZkOI1uMFfzf3rRuPegHaHesyee+YxQ+W6SvRDQV6UrdOdRiR153wJg==, + } + engines: { node: ">=6" } camelcase@6.3.0: - resolution: {integrity: sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-Gmy6FhYlCY7uOElZUSbxo2UCDH8owEk996gkbrpsgGtrJLM3J7jGxl9Ic7Qwwj4ivOE5AWZWRMecDdF7hqGjFA==, + } + engines: { node: ">=10" } caniuse-lite@1.0.30001669: - resolution: {integrity: sha512-DlWzFDJqstqtIVx1zeSpIMLjunf5SmwOw0N2Ck/QSQdS8PLS4+9HrLaYei4w8BIAL7IB/UEDu889d8vhCTPA0w==} + resolution: + { + integrity: sha512-DlWzFDJqstqtIVx1zeSpIMLjunf5SmwOw0N2Ck/QSQdS8PLS4+9HrLaYei4w8BIAL7IB/UEDu889d8vhCTPA0w==, + } chalk@2.4.2: - resolution: {integrity: sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==, + } + engines: { node: ">=4" } chalk@4.1.2: - resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==, + } + engines: { node: ">=10" } char-regex@1.0.2: - resolution: {integrity: sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw==, + } + engines: { node: ">=10" } ci-info@3.9.0: - resolution: {integrity: sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==, + } + engines: { node: ">=8" } cjs-module-lexer@1.4.1: - resolution: {integrity: sha512-cuSVIHi9/9E/+821Qjdvngor+xpnlwnuwIyZOaLmHBVdXL+gP+I6QQB9VkO7RI77YIcTV+S1W9AreJ5eN63JBA==} + resolution: + { + integrity: sha512-cuSVIHi9/9E/+821Qjdvngor+xpnlwnuwIyZOaLmHBVdXL+gP+I6QQB9VkO7RI77YIcTV+S1W9AreJ5eN63JBA==, + } cliui@8.0.1: - resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==} - engines: {node: '>=12'} + resolution: + { + integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==, + } + engines: { node: ">=12" } co@4.6.0: - resolution: {integrity: sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==} - engines: {iojs: '>= 1.0.0', node: '>= 0.12.0'} + resolution: + { + integrity: sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==, + } + engines: { iojs: ">= 1.0.0", node: ">= 0.12.0" } collect-v8-coverage@1.0.2: - resolution: {integrity: sha512-lHl4d5/ONEbLlJvaJNtsF/Lz+WvB07u2ycqTYbdrq7UypDXailES4valYb2eWiJFxZlVmpGekfqoxQhzyFdT4Q==} + resolution: + { + integrity: sha512-lHl4d5/ONEbLlJvaJNtsF/Lz+WvB07u2ycqTYbdrq7UypDXailES4valYb2eWiJFxZlVmpGekfqoxQhzyFdT4Q==, + } color-convert@1.9.3: - resolution: {integrity: sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==} + resolution: + { + integrity: sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==, + } color-convert@2.0.1: - resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==} - engines: {node: '>=7.0.0'} + resolution: + { + integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==, + } + engines: { node: ">=7.0.0" } color-name@1.1.3: - resolution: {integrity: sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==} + resolution: + { + integrity: sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==, + } color-name@1.1.4: - resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} + resolution: + { + integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==, + } combined-stream@1.0.8: - resolution: {integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==} - engines: {node: '>= 0.8'} + resolution: + { + integrity: sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==, + } + engines: { node: ">= 0.8" } concat-map@0.0.1: - resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} + resolution: + { + integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==, + } convert-source-map@2.0.0: - resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==} + resolution: + { + integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==, + } core-js@3.38.1: - resolution: {integrity: sha512-OP35aUorbU3Zvlx7pjsFdu1rGNnD4pgw/CWoYzRY3t2EzoVT7shKHY1dlAy3f41cGIO7ZDPQimhGFTlEYkG/Hw==} + resolution: + { + integrity: sha512-OP35aUorbU3Zvlx7pjsFdu1rGNnD4pgw/CWoYzRY3t2EzoVT7shKHY1dlAy3f41cGIO7ZDPQimhGFTlEYkG/Hw==, + } create-jest@29.7.0: - resolution: {integrity: sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-Adz2bdH0Vq3F53KEMJOoftQFutWCukm6J24wbPWRO4k1kMY7gS7ds/uoJkNuV8wDCtWWnuwGcJwpWcih+zEW1Q==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } hasBin: true create-require@1.1.1: - resolution: {integrity: sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==} + resolution: + { + integrity: sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==, + } cross-spawn@7.0.3: - resolution: {integrity: sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==} - engines: {node: '>= 8'} + resolution: + { + integrity: sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==, + } + engines: { node: ">= 8" } debug@4.3.7: - resolution: {integrity: sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==} - engines: {node: '>=6.0'} + resolution: + { + integrity: sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==, + } + engines: { node: ">=6.0" } peerDependencies: - supports-color: '*' + supports-color: "*" peerDependenciesMeta: supports-color: optional: true dedent@1.5.3: - resolution: {integrity: sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==} + resolution: + { + integrity: sha512-NHQtfOOW68WD8lgypbLA5oT+Bt0xXJhiYvoR6SmmNXZfpzOGXwdKWmcwG8N7PwVVWV3eF/68nmD9BaJSsTBhyQ==, + } peerDependencies: babel-plugin-macros: ^3.1.0 peerDependenciesMeta: @@ -619,244 +1031,427 @@ packages: optional: true deepmerge@4.3.1: - resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==, + } + engines: { node: ">=0.10.0" } delayed-stream@1.0.0: - resolution: {integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==} - engines: {node: '>=0.4.0'} + resolution: + { + integrity: sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==, + } + engines: { node: ">=0.4.0" } detect-newline@3.1.0: - resolution: {integrity: sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-TLz+x/vEXm/Y7P7wn1EJFNLxYpUD4TgMosxY6fAVJUnJMbupHBOncxyWUG9OpTaH9EBD7uFI5LfEgmMOc54DsA==, + } + engines: { node: ">=8" } diff-sequences@29.6.3: - resolution: {integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-EjePK1srD3P08o2j4f0ExnylqRs5B9tJjcp9t1krH2qRi8CCdsYfwe9JgSLurFBWwq4uOlipzfk5fHNvwFKr8Q==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } diff@4.0.2: - resolution: {integrity: sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==} - engines: {node: '>=0.3.1'} + resolution: + { + integrity: sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==, + } + engines: { node: ">=0.3.1" } ejs@3.1.10: - resolution: {integrity: sha512-UeJmFfOrAQS8OJWPZ4qtgHyWExa088/MtK5UEyoJGFH67cDEXkZSviOiKRCZ4Xij0zxI3JECgYs3oKx+AizQBA==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-UeJmFfOrAQS8OJWPZ4qtgHyWExa088/MtK5UEyoJGFH67cDEXkZSviOiKRCZ4Xij0zxI3JECgYs3oKx+AizQBA==, + } + engines: { node: ">=0.10.0" } hasBin: true electron-to-chromium@1.5.42: - resolution: {integrity: sha512-gIfKavKDw1mhvic9nbzA5lZw8QSHpdMwLwXc0cWidQz9B15pDoDdDH4boIatuFfeoCatb3a/NGL6CYRVFxGZ9g==} + resolution: + { + integrity: sha512-gIfKavKDw1mhvic9nbzA5lZw8QSHpdMwLwXc0cWidQz9B15pDoDdDH4boIatuFfeoCatb3a/NGL6CYRVFxGZ9g==, + } emittery@0.13.1: - resolution: {integrity: sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==} - engines: {node: '>=12'} + resolution: + { + integrity: sha512-DeWwawk6r5yR9jFgnDKYt4sLS0LmHJJi3ZOnb5/JdbYwj3nW+FxQnHIjhBKz8YLC7oRNPVM9NQ47I3CVx34eqQ==, + } + engines: { node: ">=12" } emoji-regex@8.0.0: - resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} + resolution: + { + integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==, + } error-ex@1.3.2: - resolution: {integrity: sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==} + resolution: + { + integrity: sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==, + } escalade@3.2.0: - resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==, + } + engines: { node: ">=6" } escape-string-regexp@1.0.5: - resolution: {integrity: sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==} - engines: {node: '>=0.8.0'} + resolution: + { + integrity: sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==, + } + engines: { node: ">=0.8.0" } escape-string-regexp@2.0.0: - resolution: {integrity: sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-UpzcLCXolUWcNu5HtVMHYdXJjArjsF9C0aNnquZYY4uW/Vu0miy5YoWvbV345HauVvcAUnpRuhMMcqTcGOY2+w==, + } + engines: { node: ">=8" } esprima@4.0.1: - resolution: {integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==, + } + engines: { node: ">=4" } hasBin: true execa@5.1.1: - resolution: {integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==, + } + engines: { node: ">=10" } exit@0.1.2: - resolution: {integrity: sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ==} - engines: {node: '>= 0.8.0'} + resolution: + { + integrity: sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ==, + } + engines: { node: ">= 0.8.0" } expect@29.7.0: - resolution: {integrity: sha512-2Zks0hf1VLFYI1kbh0I5jP3KHHyCHpkfyHBzsSXRFgl/Bg9mWYfMW8oD+PdMPlEwy5HNsR9JutYy6pMeOh61nw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-2Zks0hf1VLFYI1kbh0I5jP3KHHyCHpkfyHBzsSXRFgl/Bg9mWYfMW8oD+PdMPlEwy5HNsR9JutYy6pMeOh61nw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } fast-json-stable-stringify@2.1.0: - resolution: {integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==} + resolution: + { + integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==, + } fb-watchman@2.0.2: - resolution: {integrity: sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==} + resolution: + { + integrity: sha512-p5161BqbuCaSnB8jIbzQHOlpgsPmK5rJVDfDKO91Axs5NC1uu3HRQm6wt9cd9/+GtQQIO53JdGXXoyDpTAsgYA==, + } fflate@0.4.8: - resolution: {integrity: sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA==} + resolution: + { + integrity: sha512-FJqqoDBR00Mdj9ppamLa/Y7vxm+PRmNWA67N846RvsoYVMKB4q3y/de5PA7gUmRMYK/8CMz2GDZQmCRN1wBcWA==, + } filelist@1.0.4: - resolution: {integrity: sha512-w1cEuf3S+DrLCQL7ET6kz+gmlJdbq9J7yXCSjK/OZCPA+qEN1WyF4ZAf0YYJa4/shHJra2t/d/r8SV4Ji+x+8Q==} + resolution: + { + integrity: sha512-w1cEuf3S+DrLCQL7ET6kz+gmlJdbq9J7yXCSjK/OZCPA+qEN1WyF4ZAf0YYJa4/shHJra2t/d/r8SV4Ji+x+8Q==, + } fill-range@7.1.1: - resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==, + } + engines: { node: ">=8" } find-up@4.1.0: - resolution: {integrity: sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-PpOwAdQ/YlXQ2vj8a3h8IipDuYRi3wceVQQGYWxNINccq40Anw7BlsEXCMbt1Zt+OLA6Fq9suIpIWD0OsnISlw==, + } + engines: { node: ">=8" } follow-redirects@1.15.9: - resolution: {integrity: sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==} - engines: {node: '>=4.0'} + resolution: + { + integrity: sha512-gew4GsXizNgdoRyqmyfMHyAmXsZDk6mHkSxZFCzW9gwlbtOW44CDtYavM+y+72qD/Vq2l550kMF52DT8fOLJqQ==, + } + engines: { node: ">=4.0" } peerDependencies: - debug: '*' + debug: "*" peerDependenciesMeta: debug: optional: true form-data@4.0.1: - resolution: {integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==} - engines: {node: '>= 6'} + resolution: + { + integrity: sha512-tzN8e4TX8+kkxGPK8D5u0FNmjPUjw3lwC9lSLxxoB/+GtsJG91CO8bSWy73APlgAZzZbXEYZJuxjkHH2w+Ezhw==, + } + engines: { node: ">= 6" } fs.realpath@1.0.0: - resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==} + resolution: + { + integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==, + } fsevents@2.3.3: - resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==} - engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0} + resolution: + { + integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==, + } + engines: { node: ^8.16.0 || ^10.6.0 || >=11.0.0 } os: [darwin] function-bind@1.1.2: - resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} + resolution: + { + integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==, + } gensync@1.0.0-beta.2: - resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==} - engines: {node: '>=6.9.0'} + resolution: + { + integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==, + } + engines: { node: ">=6.9.0" } get-caller-file@2.0.5: - resolution: {integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==} - engines: {node: 6.* || 8.* || >= 10.*} + resolution: + { + integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==, + } + engines: { node: 6.* || 8.* || >= 10.* } get-package-type@0.1.0: - resolution: {integrity: sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==} - engines: {node: '>=8.0.0'} + resolution: + { + integrity: sha512-pjzuKtY64GYfWizNAJ0fr9VqttZkNiK2iS430LtIHzjBEr6bX8Am2zm4sW4Ro5wjWW5cAlRL1qAMTcXbjNAO2Q==, + } + engines: { node: ">=8.0.0" } get-stream@6.0.1: - resolution: {integrity: sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==, + } + engines: { node: ">=10" } glob@7.2.3: - resolution: {integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==} + resolution: + { + integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==, + } deprecated: Glob versions prior to v9 are no longer supported globals@11.12.0: - resolution: {integrity: sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==, + } + engines: { node: ">=4" } graceful-fs@4.2.11: - resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} + resolution: + { + integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==, + } has-flag@3.0.0: - resolution: {integrity: sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==, + } + engines: { node: ">=4" } has-flag@4.0.0: - resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==, + } + engines: { node: ">=8" } hasown@2.0.2: - resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} - engines: {node: '>= 0.4'} + resolution: + { + integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==, + } + engines: { node: ">= 0.4" } html-escaper@2.0.2: - resolution: {integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==} + resolution: + { + integrity: sha512-H2iMtd0I4Mt5eYiapRdIDjp+XzelXQ0tFE4JS7YFwFevXXMmOp9myNrUvCg0D6ws8iqkRPBfKHgbwig1SmlLfg==, + } human-signals@2.1.0: - resolution: {integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==} - engines: {node: '>=10.17.0'} + resolution: + { + integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==, + } + engines: { node: ">=10.17.0" } import-local@3.2.0: - resolution: {integrity: sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-2SPlun1JUPWoM6t3F0dw0FkCF/jWY8kttcY4f599GLTSjh2OCuuhdTkJQsEcZzBqbXZGKMK2OqW1oZsjtf/gQA==, + } + engines: { node: ">=8" } hasBin: true imurmurhash@0.1.4: - resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==} - engines: {node: '>=0.8.19'} + resolution: + { + integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==, + } + engines: { node: ">=0.8.19" } inflight@1.0.6: - resolution: {integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==} + resolution: + { + integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==, + } deprecated: This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful. inherits@2.0.4: - resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + resolution: + { + integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==, + } is-arrayish@0.2.1: - resolution: {integrity: sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==} + resolution: + { + integrity: sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==, + } is-core-module@2.15.1: - resolution: {integrity: sha512-z0vtXSwucUJtANQWldhbtbt7BnL0vxiFjIdDLAatwhDYty2bad6s+rijD6Ri4YuYJubLzIJLUidCh09e1djEVQ==} - engines: {node: '>= 0.4'} + resolution: + { + integrity: sha512-z0vtXSwucUJtANQWldhbtbt7BnL0vxiFjIdDLAatwhDYty2bad6s+rijD6Ri4YuYJubLzIJLUidCh09e1djEVQ==, + } + engines: { node: ">= 0.4" } is-fullwidth-code-point@3.0.0: - resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==, + } + engines: { node: ">=8" } is-generator-fn@2.1.0: - resolution: {integrity: sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-cTIB4yPYL/Grw0EaSzASzg6bBy9gqCofvWN8okThAYIxKJZC+udlRAmGbM0XLeniEJSs8uEgHPGuHSe1XsOLSQ==, + } + engines: { node: ">=6" } is-number@7.0.0: - resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==} - engines: {node: '>=0.12.0'} + resolution: + { + integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==, + } + engines: { node: ">=0.12.0" } is-stream@2.0.1: - resolution: {integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==, + } + engines: { node: ">=8" } isexe@2.0.0: - resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} + resolution: + { + integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==, + } istanbul-lib-coverage@3.2.2: - resolution: {integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-O8dpsF+r0WV/8MNRKfnmrtCWhuKjxrq2w+jpzBL5UZKTi2LeVWnWOmWRxFlesJONmc+wLAGvKQZEOanko0LFTg==, + } + engines: { node: ">=8" } istanbul-lib-instrument@5.2.1: - resolution: {integrity: sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-pzqtp31nLv/XFOzXGuvhCb8qhjmTVo5vjVk19XE4CRlSWz0KoeJ3bw9XsA7nOp9YBf4qHjwBxkDzKcME/J29Yg==, + } + engines: { node: ">=8" } istanbul-lib-instrument@6.0.3: - resolution: {integrity: sha512-Vtgk7L/R2JHyyGW07spoFlB8/lpjiOLTjMdms6AFMraYt3BaJauod/NGrfnVG/y4Ix1JEuMRPDPEj2ua+zz1/Q==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-Vtgk7L/R2JHyyGW07spoFlB8/lpjiOLTjMdms6AFMraYt3BaJauod/NGrfnVG/y4Ix1JEuMRPDPEj2ua+zz1/Q==, + } + engines: { node: ">=10" } istanbul-lib-report@3.0.1: - resolution: {integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-GCfE1mtsHGOELCU8e/Z7YWzpmybrx/+dSTfLrvY8qRmaY6zXTKWn6WQIjaAFw069icm6GVMNkgu0NzI4iPZUNw==, + } + engines: { node: ">=10" } istanbul-lib-source-maps@4.0.1: - resolution: {integrity: sha512-n3s8EwkdFIJCG3BPKBYvskgXGoy88ARzvegkitk60NxRdwltLOTaH7CUiMRXvwYorl0Q712iEjcWB+fK/MrWVw==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-n3s8EwkdFIJCG3BPKBYvskgXGoy88ARzvegkitk60NxRdwltLOTaH7CUiMRXvwYorl0Q712iEjcWB+fK/MrWVw==, + } + engines: { node: ">=10" } istanbul-reports@3.1.7: - resolution: {integrity: sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-BewmUXImeuRk2YY0PVbxgKAysvhRPUQE0h5QRM++nVWyubKGV0l8qQ5op8+B2DOmwSe63Jivj0BjkPQVf8fP5g==, + } + engines: { node: ">=8" } jake@10.9.2: - resolution: {integrity: sha512-2P4SQ0HrLQ+fw6llpLnOaGAvN2Zu6778SJMrCUwns4fOoG9ayrTiZk3VV8sCPkVZF8ab0zksVpS8FDY5pRCNBA==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-2P4SQ0HrLQ+fw6llpLnOaGAvN2Zu6778SJMrCUwns4fOoG9ayrTiZk3VV8sCPkVZF8ab0zksVpS8FDY5pRCNBA==, + } + engines: { node: ">=10" } hasBin: true jest-changed-files@29.7.0: - resolution: {integrity: sha512-fEArFiwf1BpQ+4bXSprcDc3/x4HSzL4al2tozwVpDFpsxALjLYdyiIK4e5Vz66GQJIbXJ82+35PtysofptNX2w==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-fEArFiwf1BpQ+4bXSprcDc3/x4HSzL4al2tozwVpDFpsxALjLYdyiIK4e5Vz66GQJIbXJ82+35PtysofptNX2w==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-circus@29.7.0: - resolution: {integrity: sha512-3E1nCMgipcTkCocFwM90XXQab9bS+GMsjdpmPrlelaxwD93Ad8iVEjX/vvHPdLPnFf+L40u+5+iutRdA1N9myw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-3E1nCMgipcTkCocFwM90XXQab9bS+GMsjdpmPrlelaxwD93Ad8iVEjX/vvHPdLPnFf+L40u+5+iutRdA1N9myw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-cli@29.7.0: - resolution: {integrity: sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-OVVobw2IubN/GSYsxETi+gOe7Ka59EFMR/twOU3Jb2GnKKeMGJB5SGUUrEz3SFVmJASUdZUzy83sLNNQ2gZslg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } hasBin: true peerDependencies: node-notifier: ^8.0.1 || ^9.0.0 || ^10.0.0 @@ -865,109 +1460,178 @@ packages: optional: true jest-config@29.7.0: - resolution: {integrity: sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-uXbpfeQ7R6TZBqI3/TxCU4q4ttk3u0PJeC+E0zbfSoSjq6bJ7buBPxzQPL0ifrkY4DNu4JUdk0ImlBUYi840eQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } peerDependencies: - '@types/node': '*' - ts-node: '>=9.0.0' + "@types/node": "*" + ts-node: ">=9.0.0" peerDependenciesMeta: - '@types/node': + "@types/node": optional: true ts-node: optional: true jest-diff@29.7.0: - resolution: {integrity: sha512-LMIgiIrhigmPrs03JHpxUh2yISK3vLFPkAodPeo0+BuF7wA2FoQbkEg1u8gBYBThncu7e1oEDUfIXVuTqLRUjw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-LMIgiIrhigmPrs03JHpxUh2yISK3vLFPkAodPeo0+BuF7wA2FoQbkEg1u8gBYBThncu7e1oEDUfIXVuTqLRUjw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-docblock@29.7.0: - resolution: {integrity: sha512-q617Auw3A612guyaFgsbFeYpNP5t2aoUNLwBUbc/0kD1R4t9ixDbyFTHd1nok4epoVFpr7PmeWHrhvuV3XaJ4g==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-q617Auw3A612guyaFgsbFeYpNP5t2aoUNLwBUbc/0kD1R4t9ixDbyFTHd1nok4epoVFpr7PmeWHrhvuV3XaJ4g==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-each@29.7.0: - resolution: {integrity: sha512-gns+Er14+ZrEoC5fhOfYCY1LOHHr0TI+rQUHZS8Ttw2l7gl+80eHc/gFf2Ktkw0+SIACDTeWvpFcv3B04VembQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-gns+Er14+ZrEoC5fhOfYCY1LOHHr0TI+rQUHZS8Ttw2l7gl+80eHc/gFf2Ktkw0+SIACDTeWvpFcv3B04VembQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-environment-node@29.7.0: - resolution: {integrity: sha512-DOSwCRqXirTOyheM+4d5YZOrWcdu0LNZ87ewUoywbcb2XR4wKgqiG8vNeYwhjFMbEkfju7wx2GYH0P2gevGvFw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-DOSwCRqXirTOyheM+4d5YZOrWcdu0LNZ87ewUoywbcb2XR4wKgqiG8vNeYwhjFMbEkfju7wx2GYH0P2gevGvFw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-get-type@29.6.3: - resolution: {integrity: sha512-zrteXnqYxfQh7l5FHyL38jL39di8H8rHoecLH3JNxH3BwOrBsNeabdap5e0I23lD4HHI8W5VFBZqG4Eaq5LNcw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-zrteXnqYxfQh7l5FHyL38jL39di8H8rHoecLH3JNxH3BwOrBsNeabdap5e0I23lD4HHI8W5VFBZqG4Eaq5LNcw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-haste-map@29.7.0: - resolution: {integrity: sha512-fP8u2pyfqx0K1rGn1R9pyE0/KTn+G7PxktWidOBTqFPLYX0b9ksaMFkhK5vrS3DVun09pckLdlx90QthlW7AmA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-fP8u2pyfqx0K1rGn1R9pyE0/KTn+G7PxktWidOBTqFPLYX0b9ksaMFkhK5vrS3DVun09pckLdlx90QthlW7AmA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-leak-detector@29.7.0: - resolution: {integrity: sha512-kYA8IJcSYtST2BY9I+SMC32nDpBT3J2NvWJx8+JCuCdl/CR1I4EKUJROiP8XtCcxqgTTBGJNdbB1A8XRKbTetw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-kYA8IJcSYtST2BY9I+SMC32nDpBT3J2NvWJx8+JCuCdl/CR1I4EKUJROiP8XtCcxqgTTBGJNdbB1A8XRKbTetw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-matcher-utils@29.7.0: - resolution: {integrity: sha512-sBkD+Xi9DtcChsI3L3u0+N0opgPYnCRPtGcQYrgXmR+hmt/fYfWAL0xRXYU8eWOdfuLgBe0YCW3AFtnRLagq/g==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-sBkD+Xi9DtcChsI3L3u0+N0opgPYnCRPtGcQYrgXmR+hmt/fYfWAL0xRXYU8eWOdfuLgBe0YCW3AFtnRLagq/g==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-message-util@29.7.0: - resolution: {integrity: sha512-GBEV4GRADeP+qtB2+6u61stea8mGcOT4mCtrYISZwfu9/ISHFJ/5zOMXYbpBE9RsS5+Gb63DW4FgmnKJ79Kf6w==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-GBEV4GRADeP+qtB2+6u61stea8mGcOT4mCtrYISZwfu9/ISHFJ/5zOMXYbpBE9RsS5+Gb63DW4FgmnKJ79Kf6w==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-mock@29.7.0: - resolution: {integrity: sha512-ITOMZn+UkYS4ZFh83xYAOzWStloNzJFO2s8DWrE4lhtGD+AorgnbkiKERe4wQVBydIGPx059g6riW5Btp6Llnw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-ITOMZn+UkYS4ZFh83xYAOzWStloNzJFO2s8DWrE4lhtGD+AorgnbkiKERe4wQVBydIGPx059g6riW5Btp6Llnw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-pnp-resolver@1.2.3: - resolution: {integrity: sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-+3NpwQEnRoIBtx4fyhblQDPgJI0H1IEIkX7ShLUjPGA7TtUTvI1oiKi3SR4oBR0hQhQR80l4WAe5RrXBwWMA8w==, + } + engines: { node: ">=6" } peerDependencies: - jest-resolve: '*' + jest-resolve: "*" peerDependenciesMeta: jest-resolve: optional: true jest-regex-util@29.6.3: - resolution: {integrity: sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-KJJBsRCyyLNWCNBOvZyRDnAIfUiRJ8v+hOBQYGn8gDyF3UegwiP4gwRR3/SDa42g1YbVycTidUF3rKjyLFDWbg==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-resolve-dependencies@29.7.0: - resolution: {integrity: sha512-un0zD/6qxJ+S0et7WxeI3H5XSe9lTBBR7bOHCHXkKR6luG5mwDDlIzVQ0V5cZCuoTgEdcdwzTghYkTWfubi+nA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-un0zD/6qxJ+S0et7WxeI3H5XSe9lTBBR7bOHCHXkKR6luG5mwDDlIzVQ0V5cZCuoTgEdcdwzTghYkTWfubi+nA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-resolve@29.7.0: - resolution: {integrity: sha512-IOVhZSrg+UvVAshDSDtHyFCCBUl/Q3AAJv8iZ6ZjnZ74xzvwuzLXid9IIIPgTnY62SJjfuupMKZsZQRsCvxEgA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-IOVhZSrg+UvVAshDSDtHyFCCBUl/Q3AAJv8iZ6ZjnZ74xzvwuzLXid9IIIPgTnY62SJjfuupMKZsZQRsCvxEgA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-runner@29.7.0: - resolution: {integrity: sha512-fsc4N6cPCAahybGBfTRcq5wFR6fpLznMg47sY5aDpsoejOcVYFb07AHuSnR0liMcPTgBsA3ZJL6kFOjPdoNipQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-fsc4N6cPCAahybGBfTRcq5wFR6fpLznMg47sY5aDpsoejOcVYFb07AHuSnR0liMcPTgBsA3ZJL6kFOjPdoNipQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-runtime@29.7.0: - resolution: {integrity: sha512-gUnLjgwdGqW7B4LvOIkbKs9WGbn+QLqRQQ9juC6HndeDiezIwhDP+mhMwHWCEcfQ5RUXa6OPnFF8BJh5xegwwQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-gUnLjgwdGqW7B4LvOIkbKs9WGbn+QLqRQQ9juC6HndeDiezIwhDP+mhMwHWCEcfQ5RUXa6OPnFF8BJh5xegwwQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-snapshot@29.7.0: - resolution: {integrity: sha512-Rm0BMWtxBcioHr1/OX5YCP8Uov4riHvKPknOGs804Zg9JGZgmIBkbtlxJC/7Z4msKYVbIJtfU+tKb8xlYNfdkw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-Rm0BMWtxBcioHr1/OX5YCP8Uov4riHvKPknOGs804Zg9JGZgmIBkbtlxJC/7Z4msKYVbIJtfU+tKb8xlYNfdkw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-util@29.7.0: - resolution: {integrity: sha512-z6EbKajIpqGKU56y5KBUgy1dt1ihhQJgWzUlZHArA/+X2ad7Cb5iF+AK1EWVL/Bo7Rz9uurpqw6SiBCefUbCGA==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-z6EbKajIpqGKU56y5KBUgy1dt1ihhQJgWzUlZHArA/+X2ad7Cb5iF+AK1EWVL/Bo7Rz9uurpqw6SiBCefUbCGA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-validate@29.7.0: - resolution: {integrity: sha512-ZB7wHqaRGVw/9hST/OuFUReG7M8vKeq0/J2egIGLdvjHCmYqGARhzXmtgi+gVeZ5uXFF219aOc3Ls2yLg27tkw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-ZB7wHqaRGVw/9hST/OuFUReG7M8vKeq0/J2egIGLdvjHCmYqGARhzXmtgi+gVeZ5uXFF219aOc3Ls2yLg27tkw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-watcher@29.7.0: - resolution: {integrity: sha512-49Fg7WXkU3Vl2h6LbLtMQ/HyB6rXSIX7SqvBLQmssRBGN9I0PNvPmAmCWSOY6SOvrjhI/F7/bGAv9RtnsPA03g==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-49Fg7WXkU3Vl2h6LbLtMQ/HyB6rXSIX7SqvBLQmssRBGN9I0PNvPmAmCWSOY6SOvrjhI/F7/bGAv9RtnsPA03g==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest-worker@29.7.0: - resolution: {integrity: sha512-eIz2msL/EzL9UFTFFx7jBTkeZfku0yUAyZZZmJ93H2TYEiroIx2PQjEXcwYtYl8zXCxb+PAmA2hLIt/6ZEkPHw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-eIz2msL/EzL9UFTFFx7jBTkeZfku0yUAyZZZmJ93H2TYEiroIx2PQjEXcwYtYl8zXCxb+PAmA2hLIt/6ZEkPHw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } jest@29.7.0: - resolution: {integrity: sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } hasBin: true peerDependencies: node-notifier: ^8.0.1 || ^9.0.0 || ^10.0.0 @@ -976,352 +1640,622 @@ packages: optional: true js-tokens@4.0.0: - resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==} + resolution: + { + integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==, + } js-yaml@3.14.1: - resolution: {integrity: sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==} + resolution: + { + integrity: sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g==, + } hasBin: true jsesc@3.0.2: - resolution: {integrity: sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-xKqzzWXDttJuOcawBt4KnKHHIf5oQ/Cxax+0PWFG+DFDgHNAdi+TXECADI+RYiFUMmx8792xsMbbgXj4CwnP4g==, + } + engines: { node: ">=6" } hasBin: true json-parse-even-better-errors@2.3.1: - resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==} + resolution: + { + integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==, + } json5@2.2.3: - resolution: {integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==, + } + engines: { node: ">=6" } hasBin: true kleur@3.0.3: - resolution: {integrity: sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-eTIzlVOSUR+JxdDFepEYcBMtZ9Qqdef+rnzWdRZuMbOywu5tO2w2N7rqjoANZ5k9vywhL6Br1VRjUIgTQx4E8w==, + } + engines: { node: ">=6" } leven@3.1.0: - resolution: {integrity: sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A==, + } + engines: { node: ">=6" } lines-and-columns@1.2.4: - resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==} + resolution: + { + integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==, + } locate-path@5.0.0: - resolution: {integrity: sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-t7hw9pI+WvuwNJXwk5zVHpyhIqzg2qTlklJOf0mVxGSbe3Fp2VieZcduNYjaLDoy6p9uGpQEGWG87WpMKlNq8g==, + } + engines: { node: ">=8" } lodash.memoize@4.1.2: - resolution: {integrity: sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==} + resolution: + { + integrity: sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==, + } lru-cache@5.1.1: - resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==} + resolution: + { + integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==, + } make-dir@4.0.0: - resolution: {integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-hXdUTZYIVOt1Ex//jAQi+wTZZpUpwBj/0QsOzqegb3rGMMeJiSEu5xLHnYfBrRV4RH2+OCSOO95Is/7x1WJ4bw==, + } + engines: { node: ">=10" } make-error@1.3.6: - resolution: {integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==} + resolution: + { + integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==, + } makeerror@1.0.12: - resolution: {integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==} + resolution: + { + integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==, + } merge-stream@2.0.0: - resolution: {integrity: sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==} + resolution: + { + integrity: sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==, + } micromatch@4.0.8: - resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==} - engines: {node: '>=8.6'} + resolution: + { + integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==, + } + engines: { node: ">=8.6" } mime-db@1.52.0: - resolution: {integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==} - engines: {node: '>= 0.6'} + resolution: + { + integrity: sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==, + } + engines: { node: ">= 0.6" } mime-types@2.1.35: - resolution: {integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==} - engines: {node: '>= 0.6'} + resolution: + { + integrity: sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==, + } + engines: { node: ">= 0.6" } mimic-fn@2.1.0: - resolution: {integrity: sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==, + } + engines: { node: ">=6" } minimatch@3.1.2: - resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==} + resolution: + { + integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==, + } minimatch@5.1.6: - resolution: {integrity: sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-lKwV/1brpG6mBUFHtb7NUmtABCb2WZZmm2wNiOA5hAb8VdCS4B3dtMWyvcoViccwAW/COERjXLt0zP1zXUN26g==, + } + engines: { node: ">=10" } mitt@3.0.1: - resolution: {integrity: sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==} + resolution: + { + integrity: sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==, + } ms@2.1.3: - resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + resolution: + { + integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==, + } nanoid@3.3.7: - resolution: {integrity: sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==} - engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1} + resolution: + { + integrity: sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==, + } + engines: { node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1 } hasBin: true natural-compare@1.4.0: - resolution: {integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==} + resolution: + { + integrity: sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==, + } node-int64@0.4.0: - resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==} + resolution: + { + integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==, + } node-releases@2.0.18: - resolution: {integrity: sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g==} + resolution: + { + integrity: sha512-d9VeXT4SJ7ZeOqGX6R5EM022wpL+eWPooLI+5UpWn2jCT1aosUQEhQP214x33Wkwx3JQMvIm+tIoVOdodFS40g==, + } normalize-path@3.0.0: - resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==, + } + engines: { node: ">=0.10.0" } npm-run-path@4.0.1: - resolution: {integrity: sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==, + } + engines: { node: ">=8" } once@1.4.0: - resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + resolution: + { + integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==, + } onetime@5.1.2: - resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==, + } + engines: { node: ">=6" } p-limit@2.3.0: - resolution: {integrity: sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-//88mFWSJx8lxCzwdAABTJL2MyWB12+eIY7MDL2SqLmAkeKU9qxRvWuSyTjm3FUmpBEMuFfckAIqEaVGUDxb6w==, + } + engines: { node: ">=6" } p-limit@3.1.0: - resolution: {integrity: sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==, + } + engines: { node: ">=10" } p-locate@4.1.0: - resolution: {integrity: sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-R79ZZ/0wAxKGu3oYMlz8jy/kbhsNrS7SKZ7PxEHBgJ5+F2mtFW2fK2cOtBh1cHYkQsbzFV7I+EoRKe6Yt0oK7A==, + } + engines: { node: ">=8" } p-try@2.2.0: - resolution: {integrity: sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ==, + } + engines: { node: ">=6" } parse-json@5.2.0: - resolution: {integrity: sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==, + } + engines: { node: ">=8" } path-exists@4.0.0: - resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==, + } + engines: { node: ">=8" } path-is-absolute@1.0.1: - resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==, + } + engines: { node: ">=0.10.0" } path-key@3.1.1: - resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==, + } + engines: { node: ">=8" } path-parse@1.0.7: - resolution: {integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==} + resolution: + { + integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==, + } picocolors@1.1.1: - resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==} + resolution: + { + integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==, + } picomatch@2.3.1: - resolution: {integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==} - engines: {node: '>=8.6'} + resolution: + { + integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==, + } + engines: { node: ">=8.6" } pirates@4.0.6: - resolution: {integrity: sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==} - engines: {node: '>= 6'} + resolution: + { + integrity: sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg==, + } + engines: { node: ">= 6" } pkg-dir@4.2.0: - resolution: {integrity: sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-HRDzbaKjC+AOWVXxAU/x54COGeIv9eb+6CkDSQoNTt4XyWoIJvuPsXizxu/Fr23EiekbtZwmh1IcIG/l/a10GQ==, + } + engines: { node: ">=8" } postcss@8.4.47: - resolution: {integrity: sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==} - engines: {node: ^10 || ^12 || >=14} + resolution: + { + integrity: sha512-56rxCq7G/XfB4EkXq9Egn5GCqugWvDFjafDOThIdMBsI15iqPqR5r15TfSr1YPYeEI19YeaXMCbY6u88Y76GLQ==, + } + engines: { node: ^10 || ^12 || >=14 } posthog-js@1.184.1: - resolution: {integrity: sha512-q/1Kdard5SZnL2smrzeKcD+RuUi2PnbidiN4D3ThK20bNrhy5Z2heIy9SnRMvEiARY5lcQ7zxmDCAKPBKGSOtQ==} + resolution: + { + integrity: sha512-q/1Kdard5SZnL2smrzeKcD+RuUi2PnbidiN4D3ThK20bNrhy5Z2heIy9SnRMvEiARY5lcQ7zxmDCAKPBKGSOtQ==, + } posthog-node@4.2.1: - resolution: {integrity: sha512-l+fsjYEkTik3m/G0pE7gMr4qBJP84LhK779oQm6MBzhBGpd4By4qieTW+4FUAlNCyzQTynn3Nhsa50c0IELSxQ==} - engines: {node: '>=15.0.0'} + resolution: + { + integrity: sha512-l+fsjYEkTik3m/G0pE7gMr4qBJP84LhK779oQm6MBzhBGpd4By4qieTW+4FUAlNCyzQTynn3Nhsa50c0IELSxQ==, + } + engines: { node: ">=15.0.0" } preact@10.24.3: - resolution: {integrity: sha512-Z2dPnBnMUfyQfSQ+GBdsGa16hz35YmLmtTLhM169uW944hYL6xzTYkJjC07j+Wosz733pMWx0fgON3JNw1jJQA==} + resolution: + { + integrity: sha512-Z2dPnBnMUfyQfSQ+GBdsGa16hz35YmLmtTLhM169uW944hYL6xzTYkJjC07j+Wosz733pMWx0fgON3JNw1jJQA==, + } prettier@3.3.3: - resolution: {integrity: sha512-i2tDNA0O5IrMO757lfrdQZCc2jPNDVntV0m/+4whiDfWaTKfMNgR7Qz0NAeGz/nRqF4m5/6CLzbP4/liHt12Ew==} - engines: {node: '>=14'} + resolution: + { + integrity: sha512-i2tDNA0O5IrMO757lfrdQZCc2jPNDVntV0m/+4whiDfWaTKfMNgR7Qz0NAeGz/nRqF4m5/6CLzbP4/liHt12Ew==, + } + engines: { node: ">=14" } hasBin: true pretty-format@29.7.0: - resolution: {integrity: sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==} - engines: {node: ^14.15.0 || ^16.10.0 || >=18.0.0} + resolution: + { + integrity: sha512-Pdlw/oPxN+aXdmM9R00JVC9WVFoCLTKJvDVLgmJ+qAffBMxsV85l/Lu7sNx4zSzPyoL2euImuEwHhOXdEgNFZQ==, + } + engines: { node: ^14.15.0 || ^16.10.0 || >=18.0.0 } prompts@2.4.2: - resolution: {integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==} - engines: {node: '>= 6'} + resolution: + { + integrity: sha512-NxNv/kLguCA7p3jE8oL2aEBsrJWgAakBpgmgK6lpPWV+WuOmY6r2/zbAVnP+T8bQlA0nzHXSJSJW0Hq7ylaD2Q==, + } + engines: { node: ">= 6" } proxy-from-env@1.1.0: - resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==} + resolution: + { + integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==, + } pure-rand@6.1.0: - resolution: {integrity: sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==} + resolution: + { + integrity: sha512-bVWawvoZoBYpp6yIoQtQXHZjmz35RSVHnUOTefl8Vcjr8snTPY1wnpSPMWekcFwbxI6gtmT7rSYPFvz71ldiOA==, + } react-is@18.3.1: - resolution: {integrity: sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==} + resolution: + { + integrity: sha512-/LLMVyas0ljjAtoYiPqYiL8VWXzUUdThrmU5+n20DZv+a+ClRoevUzw5JxU+Ieh5/c87ytoTBV9G1FiKfNJdmg==, + } require-directory@2.1.1: - resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==, + } + engines: { node: ">=0.10.0" } resolve-cwd@3.0.0: - resolution: {integrity: sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-OrZaX2Mb+rJCpH/6CpSqt9xFVpN++x01XnN2ie9g6P5/3xelLAkXWVADpdz1IHD/KFfEXyE6V0U01OQ3UO2rEg==, + } + engines: { node: ">=8" } resolve-from@5.0.0: - resolution: {integrity: sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==, + } + engines: { node: ">=8" } resolve.exports@2.0.2: - resolution: {integrity: sha512-X2UW6Nw3n/aMgDVy+0rSqgHlv39WZAlZrXCdnbyEiKm17DSqHX4MmQMaST3FbeWR5FTuRcUwYAziZajji0Y7mg==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-X2UW6Nw3n/aMgDVy+0rSqgHlv39WZAlZrXCdnbyEiKm17DSqHX4MmQMaST3FbeWR5FTuRcUwYAziZajji0Y7mg==, + } + engines: { node: ">=10" } resolve@1.22.8: - resolution: {integrity: sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==} + resolution: + { + integrity: sha512-oKWePCxqpd6FlLvGV1VU0x7bkPmmCNolxzjMf4NczoDnQcIWrAF+cPtZn5i6n+RfD2d9i0tzpKnG6Yk168yIyw==, + } hasBin: true rrdom@2.0.0-alpha.17: - resolution: {integrity: sha512-b6caDiNcFO96Opp7TGdcVd4OLGSXu5dJe+A0IDiAu8mk7OmhqZCSDlgQdTKmdO5wMf4zPsUTgb8H/aNvR3kDHA==} + resolution: + { + integrity: sha512-b6caDiNcFO96Opp7TGdcVd4OLGSXu5dJe+A0IDiAu8mk7OmhqZCSDlgQdTKmdO5wMf4zPsUTgb8H/aNvR3kDHA==, + } rrweb-snapshot@2.0.0-alpha.17: - resolution: {integrity: sha512-GBg5pV8LHOTbeVmH2VHLEFR0mc2QpQMzAvcoxEGfPNWgWHc8UvKCyq7pqN1vA+fDZ+yXXbixeO0kB2pzVvFCBw==} + resolution: + { + integrity: sha512-GBg5pV8LHOTbeVmH2VHLEFR0mc2QpQMzAvcoxEGfPNWgWHc8UvKCyq7pqN1vA+fDZ+yXXbixeO0kB2pzVvFCBw==, + } rrweb-snapshot@2.0.0-alpha.4: - resolution: {integrity: sha512-KQ2OtPpXO5jLYqg1OnXS/Hf+EzqnZyP5A+XPqBCjYpj3XIje/Od4gdUwjbFo3cVuWq5Cw5Y1d3/xwgIS7/XpQQ==} + resolution: + { + integrity: sha512-KQ2OtPpXO5jLYqg1OnXS/Hf+EzqnZyP5A+XPqBCjYpj3XIje/Od4gdUwjbFo3cVuWq5Cw5Y1d3/xwgIS7/XpQQ==, + } rrweb@2.0.0-alpha.17: - resolution: {integrity: sha512-GQxBkCC4r9XL2bwSdv7iIS49M3cEA8OtObVq0rrQ4GUT4+h7omucGQ4x7m5YN5Vq1oalStBaBlYqF7yRnfG3JA==} + resolution: + { + integrity: sha512-GQxBkCC4r9XL2bwSdv7iIS49M3cEA8OtObVq0rrQ4GUT4+h7omucGQ4x7m5YN5Vq1oalStBaBlYqF7yRnfG3JA==, + } rusha@0.8.14: - resolution: {integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==} + resolution: + { + integrity: sha512-cLgakCUf6PedEu15t8kbsjnwIFFR2D4RfL+W3iWFJ4iac7z4B0ZI8fxy4R3J956kAI68HclCFGL8MPoUVC3qVA==, + } semver@6.3.1: - resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==} + resolution: + { + integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==, + } hasBin: true semver@7.6.3: - resolution: {integrity: sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==, + } + engines: { node: ">=10" } hasBin: true shebang-command@2.0.0: - resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==, + } + engines: { node: ">=8" } shebang-regex@3.0.0: - resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==, + } + engines: { node: ">=8" } signal-exit@3.0.7: - resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==} + resolution: + { + integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==, + } sisteransi@1.0.5: - resolution: {integrity: sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==} + resolution: + { + integrity: sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==, + } slash@3.0.0: - resolution: {integrity: sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==, + } + engines: { node: ">=8" } source-map-js@1.2.1: - resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==, + } + engines: { node: ">=0.10.0" } source-map-support@0.5.13: - resolution: {integrity: sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==} + resolution: + { + integrity: sha512-SHSKFHadjVA5oR4PPqhtAVdcBWwRYVd6g6cAXnIbRiIwc2EhPrTuKUBdSLvlEKyIP3GCf89fltvcZiP9MMFA1w==, + } source-map@0.6.1: - resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==} - engines: {node: '>=0.10.0'} + resolution: + { + integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==, + } + engines: { node: ">=0.10.0" } sprintf-js@1.0.3: - resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==} + resolution: + { + integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==, + } stack-utils@2.0.6: - resolution: {integrity: sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-XlkWvfIm6RmsWtNJx+uqtKLS8eqFbxUg0ZzLXqY0caEy9l7hruX8IpiDnjsLavoBgqCCR71TqWO8MaXYheJ3RQ==, + } + engines: { node: ">=10" } string-length@4.0.2: - resolution: {integrity: sha512-+l6rNN5fYHNhZZy41RXsYptCjA2Igmq4EG7kZAYFQI1E1VTXarr6ZPXBg6eq7Y6eK4FEhY6AJlyuFIb/v/S0VQ==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-+l6rNN5fYHNhZZy41RXsYptCjA2Igmq4EG7kZAYFQI1E1VTXarr6ZPXBg6eq7Y6eK4FEhY6AJlyuFIb/v/S0VQ==, + } + engines: { node: ">=10" } string-width@4.2.3: - resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==, + } + engines: { node: ">=8" } strip-ansi@6.0.1: - resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==, + } + engines: { node: ">=8" } strip-bom@4.0.0: - resolution: {integrity: sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-3xurFv5tEgii33Zi8Jtp55wEIILR9eh34FAW00PZf+JnSsTmV/ioewSgQl97JHvgjoRGwPShsWm+IdrxB35d0w==, + } + engines: { node: ">=8" } strip-final-newline@2.0.0: - resolution: {integrity: sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==, + } + engines: { node: ">=6" } strip-json-comments@3.1.1: - resolution: {integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-6fPc+R4ihwqP6N/aIv2f1gMH8lOVtWQHoqC4yK6oSDVVocumAsfCqjkXnqiYMhmMwS/mEHLp7Vehlt3ql6lEig==, + } + engines: { node: ">=8" } supports-color@5.5.0: - resolution: {integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==, + } + engines: { node: ">=4" } supports-color@7.2.0: - resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==, + } + engines: { node: ">=8" } supports-color@8.1.1: - resolution: {integrity: sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q==, + } + engines: { node: ">=10" } supports-preserve-symlinks-flag@1.0.0: - resolution: {integrity: sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==} - engines: {node: '>= 0.4'} + resolution: + { + integrity: sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==, + } + engines: { node: ">= 0.4" } test-exclude@6.0.0: - resolution: {integrity: sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==} - engines: {node: '>=8'} + resolution: + { + integrity: sha512-cAGWPIyOHU6zlmg88jwm7VRyXnMN7iV68OGAbYDk/Mh/xC/pzVPlQtY6ngoIH/5/tciuhGfvESU8GrHrcxD56w==, + } + engines: { node: ">=8" } tmpl@1.0.5: - resolution: {integrity: sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==} + resolution: + { + integrity: sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==, + } to-regex-range@5.0.1: - resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==} - engines: {node: '>=8.0'} + resolution: + { + integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==, + } + engines: { node: ">=8.0" } ts-jest@29.2.5: - resolution: {integrity: sha512-KD8zB2aAZrcKIdGk4OwpJggeLcH1FgrICqDSROWqlnJXGCXK4Mn6FcdK2B6670Xr73lHMG1kHw8R87A0ecZ+vA==} - engines: {node: ^14.15.0 || ^16.10.0 || ^18.0.0 || >=20.0.0} + resolution: + { + integrity: sha512-KD8zB2aAZrcKIdGk4OwpJggeLcH1FgrICqDSROWqlnJXGCXK4Mn6FcdK2B6670Xr73lHMG1kHw8R87A0ecZ+vA==, + } + engines: { node: ^14.15.0 || ^16.10.0 || ^18.0.0 || >=20.0.0 } hasBin: true peerDependencies: - '@babel/core': '>=7.0.0-beta.0 <8' - '@jest/transform': ^29.0.0 - '@jest/types': ^29.0.0 + "@babel/core": ">=7.0.0-beta.0 <8" + "@jest/transform": ^29.0.0 + "@jest/types": ^29.0.0 babel-jest: ^29.0.0 - esbuild: '*' + esbuild: "*" jest: ^29.0.0 - typescript: '>=4.3 <6' + typescript: ">=4.3 <6" peerDependenciesMeta: - '@babel/core': + "@babel/core": optional: true - '@jest/transform': + "@jest/transform": optional: true - '@jest/types': + "@jest/types": optional: true babel-jest: optional: true @@ -1329,123 +2263,185 @@ packages: optional: true ts-node@10.9.2: - resolution: {integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==} + resolution: + { + integrity: sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==, + } hasBin: true peerDependencies: - '@swc/core': '>=1.2.50' - '@swc/wasm': '>=1.2.50' - '@types/node': '*' - typescript: '>=2.7' + "@swc/core": ">=1.2.50" + "@swc/wasm": ">=1.2.50" + "@types/node": "*" + typescript: ">=2.7" peerDependenciesMeta: - '@swc/core': + "@swc/core": optional: true - '@swc/wasm': + "@swc/wasm": optional: true type-detect@4.0.8: - resolution: {integrity: sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==} - engines: {node: '>=4'} + resolution: + { + integrity: sha512-0fr/mIH1dlO+x7TlcMy+bIDqKPsw/70tVyeHW787goQjhmqaZe10uwLujubK9q9Lg6Fiho1KUKDYz0Z7k7g5/g==, + } + engines: { node: ">=4" } type-fest@0.21.3: - resolution: {integrity: sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w==, + } + engines: { node: ">=10" } typescript@5.6.3: - resolution: {integrity: sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==} - engines: {node: '>=14.17'} + resolution: + { + integrity: sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==, + } + engines: { node: ">=14.17" } hasBin: true undici-types@6.19.8: - resolution: {integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==} + resolution: + { + integrity: sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==, + } update-browserslist-db@1.1.1: - resolution: {integrity: sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A==} + resolution: + { + integrity: sha512-R8UzCaa9Az+38REPiJ1tXlImTJXlVfgHZsglwBD/k6nj76ctsH1E3q4doGrukiLQd3sGQYu56r5+lo5r94l29A==, + } hasBin: true peerDependencies: - browserslist: '>= 4.21.0' + browserslist: ">= 4.21.0" uuid@10.0.0: - resolution: {integrity: sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==} + resolution: + { + integrity: sha512-8XkAphELsDnEGrDxUOHB3RGvXz6TeuYSGEZBOjtTtPm2lwhGBjLgOzLHB63IUWfBpNucQjND6d3AOudO+H3RWQ==, + } hasBin: true v8-compile-cache-lib@3.0.1: - resolution: {integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==} + resolution: + { + integrity: sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==, + } v8-to-istanbul@9.3.0: - resolution: {integrity: sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==} - engines: {node: '>=10.12.0'} + resolution: + { + integrity: sha512-kiGUalWN+rgBJ/1OHZsBtU4rXZOfj/7rKQxULKlIzwzQSvMJUUNgPwJEEh7gU6xEVxC0ahoOBvN2YI8GH6FNgA==, + } + engines: { node: ">=10.12.0" } walker@1.0.8: - resolution: {integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==} + resolution: + { + integrity: sha512-ts/8E8l5b7kY0vlWLewOkDXMmPdLcVV4GmOQLyxuSswIJsweeFZtAsMF7k1Nszz+TYBQrlYRmzOnr398y1JemQ==, + } web-vitals@4.2.3: - resolution: {integrity: sha512-/CFAm1mNxSmOj6i0Co+iGFJ58OS4NRGVP+AWS/l509uIK5a1bSoIVaHz/ZumpHTfHSZBpgrJ+wjfpAOrTHok5Q==} + resolution: + { + integrity: sha512-/CFAm1mNxSmOj6i0Co+iGFJ58OS4NRGVP+AWS/l509uIK5a1bSoIVaHz/ZumpHTfHSZBpgrJ+wjfpAOrTHok5Q==, + } which@2.0.2: - resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} - engines: {node: '>= 8'} + resolution: + { + integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==, + } + engines: { node: ">= 8" } hasBin: true wrap-ansi@7.0.0: - resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==, + } + engines: { node: ">=10" } wrappy@1.0.2: - resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} + resolution: + { + integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==, + } write-file-atomic@4.0.2: - resolution: {integrity: sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==} - engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} + resolution: + { + integrity: sha512-7KxauUdBmSdWnmpaGFg+ppNjKF8uNLry8LyzjauQDOVONfFLNKrKvQOxZ/VuTIcS/gge/YNahf5RIIQWTSarlg==, + } + engines: { node: ^12.13.0 || ^14.15.0 || >=16.0.0 } y18n@5.0.8: - resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==, + } + engines: { node: ">=10" } yallist@3.1.1: - resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==} + resolution: + { + integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==, + } yargs-parser@21.1.1: - resolution: {integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==} - engines: {node: '>=12'} + resolution: + { + integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==, + } + engines: { node: ">=12" } yargs@17.7.2: - resolution: {integrity: sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==} - engines: {node: '>=12'} + resolution: + { + integrity: sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==, + } + engines: { node: ">=12" } yn@3.1.1: - resolution: {integrity: sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==} - engines: {node: '>=6'} + resolution: + { + integrity: sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==, + } + engines: { node: ">=6" } yocto-queue@0.1.0: - resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==} - engines: {node: '>=10'} + resolution: + { + integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==, + } + engines: { node: ">=10" } snapshots: - - '@ampproject/remapping@2.3.0': + "@ampproject/remapping@2.3.0": dependencies: - '@jridgewell/gen-mapping': 0.3.5 - '@jridgewell/trace-mapping': 0.3.25 + "@jridgewell/gen-mapping": 0.3.5 + "@jridgewell/trace-mapping": 0.3.25 - '@babel/code-frame@7.25.9': + "@babel/code-frame@7.25.9": dependencies: - '@babel/highlight': 7.25.9 + "@babel/highlight": 7.25.9 picocolors: 1.1.1 - '@babel/compat-data@7.25.9': {} + "@babel/compat-data@7.25.9": {} - '@babel/core@7.25.9': + "@babel/core@7.25.9": dependencies: - '@ampproject/remapping': 2.3.0 - '@babel/code-frame': 7.25.9 - '@babel/generator': 7.25.9 - '@babel/helper-compilation-targets': 7.25.9 - '@babel/helper-module-transforms': 7.25.9(@babel/core@7.25.9) - '@babel/helpers': 7.25.9 - '@babel/parser': 7.25.9 - '@babel/template': 7.25.9 - '@babel/traverse': 7.25.9 - '@babel/types': 7.25.9 + "@ampproject/remapping": 2.3.0 + "@babel/code-frame": 7.25.9 + "@babel/generator": 7.25.9 + "@babel/helper-compilation-targets": 7.25.9 + "@babel/helper-module-transforms": 7.25.9(@babel/core@7.25.9) + "@babel/helpers": 7.25.9 + "@babel/parser": 7.25.9 + "@babel/template": 7.25.9 + "@babel/traverse": 7.25.9 + "@babel/types": 7.25.9 convert-source-map: 2.0.0 debug: 4.3.7 gensync: 1.0.0-beta.2 @@ -1454,184 +2450,184 @@ snapshots: transitivePeerDependencies: - supports-color - '@babel/generator@7.25.9': + "@babel/generator@7.25.9": dependencies: - '@babel/types': 7.25.9 - '@jridgewell/gen-mapping': 0.3.5 - '@jridgewell/trace-mapping': 0.3.25 + "@babel/types": 7.25.9 + "@jridgewell/gen-mapping": 0.3.5 + "@jridgewell/trace-mapping": 0.3.25 jsesc: 3.0.2 - '@babel/helper-compilation-targets@7.25.9': + "@babel/helper-compilation-targets@7.25.9": dependencies: - '@babel/compat-data': 7.25.9 - '@babel/helper-validator-option': 7.25.9 + "@babel/compat-data": 7.25.9 + "@babel/helper-validator-option": 7.25.9 browserslist: 4.24.2 lru-cache: 5.1.1 semver: 6.3.1 - '@babel/helper-module-imports@7.25.9': + "@babel/helper-module-imports@7.25.9": dependencies: - '@babel/traverse': 7.25.9 - '@babel/types': 7.25.9 + "@babel/traverse": 7.25.9 + "@babel/types": 7.25.9 transitivePeerDependencies: - supports-color - '@babel/helper-module-transforms@7.25.9(@babel/core@7.25.9)': + "@babel/helper-module-transforms@7.25.9(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-module-imports': 7.25.9 - '@babel/helper-simple-access': 7.25.9 - '@babel/helper-validator-identifier': 7.25.9 - '@babel/traverse': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-module-imports": 7.25.9 + "@babel/helper-simple-access": 7.25.9 + "@babel/helper-validator-identifier": 7.25.9 + "@babel/traverse": 7.25.9 transitivePeerDependencies: - supports-color - '@babel/helper-plugin-utils@7.25.9': {} + "@babel/helper-plugin-utils@7.25.9": {} - '@babel/helper-simple-access@7.25.9': + "@babel/helper-simple-access@7.25.9": dependencies: - '@babel/traverse': 7.25.9 - '@babel/types': 7.25.9 + "@babel/traverse": 7.25.9 + "@babel/types": 7.25.9 transitivePeerDependencies: - supports-color - '@babel/helper-string-parser@7.25.9': {} + "@babel/helper-string-parser@7.25.9": {} - '@babel/helper-validator-identifier@7.25.9': {} + "@babel/helper-validator-identifier@7.25.9": {} - '@babel/helper-validator-option@7.25.9': {} + "@babel/helper-validator-option@7.25.9": {} - '@babel/helpers@7.25.9': + "@babel/helpers@7.25.9": dependencies: - '@babel/template': 7.25.9 - '@babel/types': 7.25.9 + "@babel/template": 7.25.9 + "@babel/types": 7.25.9 - '@babel/highlight@7.25.9': + "@babel/highlight@7.25.9": dependencies: - '@babel/helper-validator-identifier': 7.25.9 + "@babel/helper-validator-identifier": 7.25.9 chalk: 2.4.2 js-tokens: 4.0.0 picocolors: 1.1.1 - '@babel/parser@7.25.9': + "@babel/parser@7.25.9": dependencies: - '@babel/types': 7.25.9 + "@babel/types": 7.25.9 - '@babel/plugin-syntax-async-generators@7.8.4(@babel/core@7.25.9)': + "@babel/plugin-syntax-async-generators@7.8.4(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-bigint@7.8.3(@babel/core@7.25.9)': + "@babel/plugin-syntax-bigint@7.8.3(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-class-properties@7.12.13(@babel/core@7.25.9)': + "@babel/plugin-syntax-class-properties@7.12.13(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-class-static-block@7.14.5(@babel/core@7.25.9)': + "@babel/plugin-syntax-class-static-block@7.14.5(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-import-attributes@7.25.9(@babel/core@7.25.9)': + "@babel/plugin-syntax-import-attributes@7.25.9(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-import-meta@7.10.4(@babel/core@7.25.9)': + "@babel/plugin-syntax-import-meta@7.10.4(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-json-strings@7.8.3(@babel/core@7.25.9)': + "@babel/plugin-syntax-json-strings@7.8.3(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-jsx@7.25.9(@babel/core@7.25.9)': + "@babel/plugin-syntax-jsx@7.25.9(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-logical-assignment-operators@7.10.4(@babel/core@7.25.9)': + "@babel/plugin-syntax-logical-assignment-operators@7.10.4(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-nullish-coalescing-operator@7.8.3(@babel/core@7.25.9)': + "@babel/plugin-syntax-nullish-coalescing-operator@7.8.3(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-numeric-separator@7.10.4(@babel/core@7.25.9)': + "@babel/plugin-syntax-numeric-separator@7.10.4(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-object-rest-spread@7.8.3(@babel/core@7.25.9)': + "@babel/plugin-syntax-object-rest-spread@7.8.3(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-optional-catch-binding@7.8.3(@babel/core@7.25.9)': + "@babel/plugin-syntax-optional-catch-binding@7.8.3(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-optional-chaining@7.8.3(@babel/core@7.25.9)': + "@babel/plugin-syntax-optional-chaining@7.8.3(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-private-property-in-object@7.14.5(@babel/core@7.25.9)': + "@babel/plugin-syntax-private-property-in-object@7.14.5(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-top-level-await@7.14.5(@babel/core@7.25.9)': + "@babel/plugin-syntax-top-level-await@7.14.5(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/plugin-syntax-typescript@7.25.9(@babel/core@7.25.9)': + "@babel/plugin-syntax-typescript@7.25.9(@babel/core@7.25.9)": dependencies: - '@babel/core': 7.25.9 - '@babel/helper-plugin-utils': 7.25.9 + "@babel/core": 7.25.9 + "@babel/helper-plugin-utils": 7.25.9 - '@babel/template@7.25.9': + "@babel/template@7.25.9": dependencies: - '@babel/code-frame': 7.25.9 - '@babel/parser': 7.25.9 - '@babel/types': 7.25.9 + "@babel/code-frame": 7.25.9 + "@babel/parser": 7.25.9 + "@babel/types": 7.25.9 - '@babel/traverse@7.25.9': + "@babel/traverse@7.25.9": dependencies: - '@babel/code-frame': 7.25.9 - '@babel/generator': 7.25.9 - '@babel/parser': 7.25.9 - '@babel/template': 7.25.9 - '@babel/types': 7.25.9 + "@babel/code-frame": 7.25.9 + "@babel/generator": 7.25.9 + "@babel/parser": 7.25.9 + "@babel/template": 7.25.9 + "@babel/types": 7.25.9 debug: 4.3.7 globals: 11.12.0 transitivePeerDependencies: - supports-color - '@babel/types@7.25.9': + "@babel/types@7.25.9": dependencies: - '@babel/helper-string-parser': 7.25.9 - '@babel/helper-validator-identifier': 7.25.9 + "@babel/helper-string-parser": 7.25.9 + "@babel/helper-validator-identifier": 7.25.9 - '@bcoe/v8-coverage@0.2.3': {} + "@bcoe/v8-coverage@0.2.3": {} - '@cspotcode/source-map-support@0.8.1': + "@cspotcode/source-map-support@0.8.1": dependencies: - '@jridgewell/trace-mapping': 0.3.9 + "@jridgewell/trace-mapping": 0.3.9 - '@istanbuljs/load-nyc-config@1.1.0': + "@istanbuljs/load-nyc-config@1.1.0": dependencies: camelcase: 5.3.1 find-up: 4.1.0 @@ -1639,25 +2635,25 @@ snapshots: js-yaml: 3.14.1 resolve-from: 5.0.0 - '@istanbuljs/schema@0.1.3': {} + "@istanbuljs/schema@0.1.3": {} - '@jest/console@29.7.0': + "@jest/console@29.7.0": dependencies: - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 chalk: 4.1.2 jest-message-util: 29.7.0 jest-util: 29.7.0 slash: 3.0.0 - '@jest/core@29.7.0(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3))': + "@jest/core@29.7.0(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3))": dependencies: - '@jest/console': 29.7.0 - '@jest/reporters': 29.7.0 - '@jest/test-result': 29.7.0 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/console": 29.7.0 + "@jest/reporters": 29.7.0 + "@jest/test-result": 29.7.0 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 ansi-escapes: 4.3.2 chalk: 4.1.2 ci-info: 3.9.0 @@ -1685,51 +2681,51 @@ snapshots: - supports-color - ts-node - '@jest/environment@29.7.0': + "@jest/environment@29.7.0": dependencies: - '@jest/fake-timers': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/fake-timers": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 jest-mock: 29.7.0 - '@jest/expect-utils@29.7.0': + "@jest/expect-utils@29.7.0": dependencies: jest-get-type: 29.6.3 - '@jest/expect@29.7.0': + "@jest/expect@29.7.0": dependencies: expect: 29.7.0 jest-snapshot: 29.7.0 transitivePeerDependencies: - supports-color - '@jest/fake-timers@29.7.0': + "@jest/fake-timers@29.7.0": dependencies: - '@jest/types': 29.6.3 - '@sinonjs/fake-timers': 10.3.0 - '@types/node': 20.16.14 + "@jest/types": 29.6.3 + "@sinonjs/fake-timers": 10.3.0 + "@types/node": 20.16.14 jest-message-util: 29.7.0 jest-mock: 29.7.0 jest-util: 29.7.0 - '@jest/globals@29.7.0': + "@jest/globals@29.7.0": dependencies: - '@jest/environment': 29.7.0 - '@jest/expect': 29.7.0 - '@jest/types': 29.6.3 + "@jest/environment": 29.7.0 + "@jest/expect": 29.7.0 + "@jest/types": 29.6.3 jest-mock: 29.7.0 transitivePeerDependencies: - supports-color - '@jest/reporters@29.7.0': + "@jest/reporters@29.7.0": dependencies: - '@bcoe/v8-coverage': 0.2.3 - '@jest/console': 29.7.0 - '@jest/test-result': 29.7.0 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 - '@jridgewell/trace-mapping': 0.3.25 - '@types/node': 20.16.14 + "@bcoe/v8-coverage": 0.2.3 + "@jest/console": 29.7.0 + "@jest/test-result": 29.7.0 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 + "@jridgewell/trace-mapping": 0.3.25 + "@types/node": 20.16.14 chalk: 4.1.2 collect-v8-coverage: 1.0.2 exit: 0.1.2 @@ -1750,35 +2746,35 @@ snapshots: transitivePeerDependencies: - supports-color - '@jest/schemas@29.6.3': + "@jest/schemas@29.6.3": dependencies: - '@sinclair/typebox': 0.27.8 + "@sinclair/typebox": 0.27.8 - '@jest/source-map@29.6.3': + "@jest/source-map@29.6.3": dependencies: - '@jridgewell/trace-mapping': 0.3.25 + "@jridgewell/trace-mapping": 0.3.25 callsites: 3.1.0 graceful-fs: 4.2.11 - '@jest/test-result@29.7.0': + "@jest/test-result@29.7.0": dependencies: - '@jest/console': 29.7.0 - '@jest/types': 29.6.3 - '@types/istanbul-lib-coverage': 2.0.6 + "@jest/console": 29.7.0 + "@jest/types": 29.6.3 + "@types/istanbul-lib-coverage": 2.0.6 collect-v8-coverage: 1.0.2 - '@jest/test-sequencer@29.7.0': + "@jest/test-sequencer@29.7.0": dependencies: - '@jest/test-result': 29.7.0 + "@jest/test-result": 29.7.0 graceful-fs: 4.2.11 jest-haste-map: 29.7.0 slash: 3.0.0 - '@jest/transform@29.7.0': + "@jest/transform@29.7.0": dependencies: - '@babel/core': 7.25.9 - '@jest/types': 29.6.3 - '@jridgewell/trace-mapping': 0.3.25 + "@babel/core": 7.25.9 + "@jest/types": 29.6.3 + "@jridgewell/trace-mapping": 0.3.25 babel-plugin-istanbul: 6.1.1 chalk: 4.1.2 convert-source-map: 2.0.0 @@ -1794,123 +2790,123 @@ snapshots: transitivePeerDependencies: - supports-color - '@jest/types@29.6.3': + "@jest/types@29.6.3": dependencies: - '@jest/schemas': 29.6.3 - '@types/istanbul-lib-coverage': 2.0.6 - '@types/istanbul-reports': 3.0.4 - '@types/node': 20.16.14 - '@types/yargs': 17.0.33 + "@jest/schemas": 29.6.3 + "@types/istanbul-lib-coverage": 2.0.6 + "@types/istanbul-reports": 3.0.4 + "@types/node": 20.16.14 + "@types/yargs": 17.0.33 chalk: 4.1.2 - '@jridgewell/gen-mapping@0.3.5': + "@jridgewell/gen-mapping@0.3.5": dependencies: - '@jridgewell/set-array': 1.2.1 - '@jridgewell/sourcemap-codec': 1.5.0 - '@jridgewell/trace-mapping': 0.3.25 + "@jridgewell/set-array": 1.2.1 + "@jridgewell/sourcemap-codec": 1.5.0 + "@jridgewell/trace-mapping": 0.3.25 - '@jridgewell/resolve-uri@3.1.2': {} + "@jridgewell/resolve-uri@3.1.2": {} - '@jridgewell/set-array@1.2.1': {} + "@jridgewell/set-array@1.2.1": {} - '@jridgewell/sourcemap-codec@1.5.0': {} + "@jridgewell/sourcemap-codec@1.5.0": {} - '@jridgewell/trace-mapping@0.3.25': + "@jridgewell/trace-mapping@0.3.25": dependencies: - '@jridgewell/resolve-uri': 3.1.2 - '@jridgewell/sourcemap-codec': 1.5.0 + "@jridgewell/resolve-uri": 3.1.2 + "@jridgewell/sourcemap-codec": 1.5.0 - '@jridgewell/trace-mapping@0.3.9': + "@jridgewell/trace-mapping@0.3.9": dependencies: - '@jridgewell/resolve-uri': 3.1.2 - '@jridgewell/sourcemap-codec': 1.5.0 + "@jridgewell/resolve-uri": 3.1.2 + "@jridgewell/sourcemap-codec": 1.5.0 - '@rrweb/record@2.0.0-alpha.17': + "@rrweb/record@2.0.0-alpha.17": dependencies: - '@rrweb/types': 2.0.0-alpha.17 + "@rrweb/types": 2.0.0-alpha.17 rrweb: 2.0.0-alpha.17 - '@rrweb/types@2.0.0-alpha.17': + "@rrweb/types@2.0.0-alpha.17": dependencies: rrweb-snapshot: 2.0.0-alpha.17 - '@rrweb/utils@2.0.0-alpha.17': {} + "@rrweb/utils@2.0.0-alpha.17": {} - '@sinclair/typebox@0.27.8': {} + "@sinclair/typebox@0.27.8": {} - '@sinonjs/commons@3.0.1': + "@sinonjs/commons@3.0.1": dependencies: type-detect: 4.0.8 - '@sinonjs/fake-timers@10.3.0': + "@sinonjs/fake-timers@10.3.0": dependencies: - '@sinonjs/commons': 3.0.1 + "@sinonjs/commons": 3.0.1 - '@tsconfig/node10@1.0.11': {} + "@tsconfig/node10@1.0.11": {} - '@tsconfig/node12@1.0.11': {} + "@tsconfig/node12@1.0.11": {} - '@tsconfig/node14@1.0.3': {} + "@tsconfig/node14@1.0.3": {} - '@tsconfig/node16@1.0.4': {} + "@tsconfig/node16@1.0.4": {} - '@types/babel__core@7.20.5': + "@types/babel__core@7.20.5": dependencies: - '@babel/parser': 7.25.9 - '@babel/types': 7.25.9 - '@types/babel__generator': 7.6.8 - '@types/babel__template': 7.4.4 - '@types/babel__traverse': 7.20.6 + "@babel/parser": 7.25.9 + "@babel/types": 7.25.9 + "@types/babel__generator": 7.6.8 + "@types/babel__template": 7.4.4 + "@types/babel__traverse": 7.20.6 - '@types/babel__generator@7.6.8': + "@types/babel__generator@7.6.8": dependencies: - '@babel/types': 7.25.9 + "@babel/types": 7.25.9 - '@types/babel__template@7.4.4': + "@types/babel__template@7.4.4": dependencies: - '@babel/parser': 7.25.9 - '@babel/types': 7.25.9 + "@babel/parser": 7.25.9 + "@babel/types": 7.25.9 - '@types/babel__traverse@7.20.6': + "@types/babel__traverse@7.20.6": dependencies: - '@babel/types': 7.25.9 + "@babel/types": 7.25.9 - '@types/css-font-loading-module@0.0.7': {} + "@types/css-font-loading-module@0.0.7": {} - '@types/graceful-fs@4.1.9': + "@types/graceful-fs@4.1.9": dependencies: - '@types/node': 20.16.14 + "@types/node": 20.16.14 - '@types/istanbul-lib-coverage@2.0.6': {} + "@types/istanbul-lib-coverage@2.0.6": {} - '@types/istanbul-lib-report@3.0.3': + "@types/istanbul-lib-report@3.0.3": dependencies: - '@types/istanbul-lib-coverage': 2.0.6 + "@types/istanbul-lib-coverage": 2.0.6 - '@types/istanbul-reports@3.0.4': + "@types/istanbul-reports@3.0.4": dependencies: - '@types/istanbul-lib-report': 3.0.3 + "@types/istanbul-lib-report": 3.0.3 - '@types/jest@29.5.13': + "@types/jest@29.5.13": dependencies: expect: 29.7.0 pretty-format: 29.7.0 - '@types/node@20.16.14': + "@types/node@20.16.14": dependencies: undici-types: 6.19.8 - '@types/stack-utils@2.0.3': {} + "@types/stack-utils@2.0.3": {} - '@types/uuid@10.0.0': {} + "@types/uuid@10.0.0": {} - '@types/yargs-parser@21.0.3': {} + "@types/yargs-parser@21.0.3": {} - '@types/yargs@17.0.33': + "@types/yargs@17.0.33": dependencies: - '@types/yargs-parser': 21.0.3 + "@types/yargs-parser": 21.0.3 - '@xstate/fsm@1.6.5': {} + "@xstate/fsm@1.6.5": {} acorn-walk@8.3.4: dependencies: @@ -1959,9 +2955,9 @@ snapshots: babel-jest@29.7.0(@babel/core@7.25.9): dependencies: - '@babel/core': 7.25.9 - '@jest/transform': 29.7.0 - '@types/babel__core': 7.20.5 + "@babel/core": 7.25.9 + "@jest/transform": 29.7.0 + "@types/babel__core": 7.20.5 babel-plugin-istanbul: 6.1.1 babel-preset-jest: 29.6.3(@babel/core@7.25.9) chalk: 4.1.2 @@ -1972,9 +2968,9 @@ snapshots: babel-plugin-istanbul@6.1.1: dependencies: - '@babel/helper-plugin-utils': 7.25.9 - '@istanbuljs/load-nyc-config': 1.1.0 - '@istanbuljs/schema': 0.1.3 + "@babel/helper-plugin-utils": 7.25.9 + "@istanbuljs/load-nyc-config": 1.1.0 + "@istanbuljs/schema": 0.1.3 istanbul-lib-instrument: 5.2.1 test-exclude: 6.0.0 transitivePeerDependencies: @@ -1982,33 +2978,33 @@ snapshots: babel-plugin-jest-hoist@29.6.3: dependencies: - '@babel/template': 7.25.9 - '@babel/types': 7.25.9 - '@types/babel__core': 7.20.5 - '@types/babel__traverse': 7.20.6 + "@babel/template": 7.25.9 + "@babel/types": 7.25.9 + "@types/babel__core": 7.20.5 + "@types/babel__traverse": 7.20.6 babel-preset-current-node-syntax@1.1.0(@babel/core@7.25.9): dependencies: - '@babel/core': 7.25.9 - '@babel/plugin-syntax-async-generators': 7.8.4(@babel/core@7.25.9) - '@babel/plugin-syntax-bigint': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-class-properties': 7.12.13(@babel/core@7.25.9) - '@babel/plugin-syntax-class-static-block': 7.14.5(@babel/core@7.25.9) - '@babel/plugin-syntax-import-attributes': 7.25.9(@babel/core@7.25.9) - '@babel/plugin-syntax-import-meta': 7.10.4(@babel/core@7.25.9) - '@babel/plugin-syntax-json-strings': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-logical-assignment-operators': 7.10.4(@babel/core@7.25.9) - '@babel/plugin-syntax-nullish-coalescing-operator': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-numeric-separator': 7.10.4(@babel/core@7.25.9) - '@babel/plugin-syntax-object-rest-spread': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-optional-catch-binding': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-optional-chaining': 7.8.3(@babel/core@7.25.9) - '@babel/plugin-syntax-private-property-in-object': 7.14.5(@babel/core@7.25.9) - '@babel/plugin-syntax-top-level-await': 7.14.5(@babel/core@7.25.9) + "@babel/core": 7.25.9 + "@babel/plugin-syntax-async-generators": 7.8.4(@babel/core@7.25.9) + "@babel/plugin-syntax-bigint": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-class-properties": 7.12.13(@babel/core@7.25.9) + "@babel/plugin-syntax-class-static-block": 7.14.5(@babel/core@7.25.9) + "@babel/plugin-syntax-import-attributes": 7.25.9(@babel/core@7.25.9) + "@babel/plugin-syntax-import-meta": 7.10.4(@babel/core@7.25.9) + "@babel/plugin-syntax-json-strings": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-logical-assignment-operators": 7.10.4(@babel/core@7.25.9) + "@babel/plugin-syntax-nullish-coalescing-operator": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-numeric-separator": 7.10.4(@babel/core@7.25.9) + "@babel/plugin-syntax-object-rest-spread": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-optional-catch-binding": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-optional-chaining": 7.8.3(@babel/core@7.25.9) + "@babel/plugin-syntax-private-property-in-object": 7.14.5(@babel/core@7.25.9) + "@babel/plugin-syntax-top-level-await": 7.14.5(@babel/core@7.25.9) babel-preset-jest@29.6.3(@babel/core@7.25.9): dependencies: - '@babel/core': 7.25.9 + "@babel/core": 7.25.9 babel-plugin-jest-hoist: 29.6.3 babel-preset-current-node-syntax: 1.1.0(@babel/core@7.25.9) @@ -2105,7 +3101,7 @@ snapshots: create-jest@29.7.0(@types/node@20.16.14)(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3)): dependencies: - '@jest/types': 29.6.3 + "@jest/types": 29.6.3 chalk: 4.1.2 exit: 0.1.2 graceful-fs: 4.2.11 @@ -2113,7 +3109,7 @@ snapshots: jest-util: 29.7.0 prompts: 2.4.2 transitivePeerDependencies: - - '@types/node' + - "@types/node" - babel-plugin-macros - supports-color - ts-node @@ -2180,7 +3176,7 @@ snapshots: expect@29.7.0: dependencies: - '@jest/expect-utils': 29.7.0 + "@jest/expect-utils": 29.7.0 jest-get-type: 29.6.3 jest-matcher-utils: 29.7.0 jest-message-util: 29.7.0 @@ -2289,9 +3285,9 @@ snapshots: istanbul-lib-instrument@5.2.1: dependencies: - '@babel/core': 7.25.9 - '@babel/parser': 7.25.9 - '@istanbuljs/schema': 0.1.3 + "@babel/core": 7.25.9 + "@babel/parser": 7.25.9 + "@istanbuljs/schema": 0.1.3 istanbul-lib-coverage: 3.2.2 semver: 6.3.1 transitivePeerDependencies: @@ -2299,9 +3295,9 @@ snapshots: istanbul-lib-instrument@6.0.3: dependencies: - '@babel/core': 7.25.9 - '@babel/parser': 7.25.9 - '@istanbuljs/schema': 0.1.3 + "@babel/core": 7.25.9 + "@babel/parser": 7.25.9 + "@istanbuljs/schema": 0.1.3 istanbul-lib-coverage: 3.2.2 semver: 7.6.3 transitivePeerDependencies: @@ -2341,11 +3337,11 @@ snapshots: jest-circus@29.7.0: dependencies: - '@jest/environment': 29.7.0 - '@jest/expect': 29.7.0 - '@jest/test-result': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/environment": 29.7.0 + "@jest/expect": 29.7.0 + "@jest/test-result": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 chalk: 4.1.2 co: 4.6.0 dedent: 1.5.3 @@ -2367,9 +3363,9 @@ snapshots: jest-cli@29.7.0(@types/node@20.16.14)(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3)): dependencies: - '@jest/core': 29.7.0(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3)) - '@jest/test-result': 29.7.0 - '@jest/types': 29.6.3 + "@jest/core": 29.7.0(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3)) + "@jest/test-result": 29.7.0 + "@jest/types": 29.6.3 chalk: 4.1.2 create-jest: 29.7.0(@types/node@20.16.14)(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3)) exit: 0.1.2 @@ -2379,16 +3375,16 @@ snapshots: jest-validate: 29.7.0 yargs: 17.7.2 transitivePeerDependencies: - - '@types/node' + - "@types/node" - babel-plugin-macros - supports-color - ts-node jest-config@29.7.0(@types/node@20.16.14)(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3)): dependencies: - '@babel/core': 7.25.9 - '@jest/test-sequencer': 29.7.0 - '@jest/types': 29.6.3 + "@babel/core": 7.25.9 + "@jest/test-sequencer": 29.7.0 + "@jest/types": 29.6.3 babel-jest: 29.7.0(@babel/core@7.25.9) chalk: 4.1.2 ci-info: 3.9.0 @@ -2409,7 +3405,7 @@ snapshots: slash: 3.0.0 strip-json-comments: 3.1.1 optionalDependencies: - '@types/node': 20.16.14 + "@types/node": 20.16.14 ts-node: 10.9.2(@types/node@20.16.14)(typescript@5.6.3) transitivePeerDependencies: - babel-plugin-macros @@ -2428,7 +3424,7 @@ snapshots: jest-each@29.7.0: dependencies: - '@jest/types': 29.6.3 + "@jest/types": 29.6.3 chalk: 4.1.2 jest-get-type: 29.6.3 jest-util: 29.7.0 @@ -2436,10 +3432,10 @@ snapshots: jest-environment-node@29.7.0: dependencies: - '@jest/environment': 29.7.0 - '@jest/fake-timers': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/environment": 29.7.0 + "@jest/fake-timers": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 jest-mock: 29.7.0 jest-util: 29.7.0 @@ -2447,9 +3443,9 @@ snapshots: jest-haste-map@29.7.0: dependencies: - '@jest/types': 29.6.3 - '@types/graceful-fs': 4.1.9 - '@types/node': 20.16.14 + "@jest/types": 29.6.3 + "@types/graceful-fs": 4.1.9 + "@types/node": 20.16.14 anymatch: 3.1.3 fb-watchman: 2.0.2 graceful-fs: 4.2.11 @@ -2475,9 +3471,9 @@ snapshots: jest-message-util@29.7.0: dependencies: - '@babel/code-frame': 7.25.9 - '@jest/types': 29.6.3 - '@types/stack-utils': 2.0.3 + "@babel/code-frame": 7.25.9 + "@jest/types": 29.6.3 + "@types/stack-utils": 2.0.3 chalk: 4.1.2 graceful-fs: 4.2.11 micromatch: 4.0.8 @@ -2487,8 +3483,8 @@ snapshots: jest-mock@29.7.0: dependencies: - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 jest-util: 29.7.0 jest-pnp-resolver@1.2.3(jest-resolve@29.7.0): @@ -2518,12 +3514,12 @@ snapshots: jest-runner@29.7.0: dependencies: - '@jest/console': 29.7.0 - '@jest/environment': 29.7.0 - '@jest/test-result': 29.7.0 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/console": 29.7.0 + "@jest/environment": 29.7.0 + "@jest/test-result": 29.7.0 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 chalk: 4.1.2 emittery: 0.13.1 graceful-fs: 4.2.11 @@ -2544,14 +3540,14 @@ snapshots: jest-runtime@29.7.0: dependencies: - '@jest/environment': 29.7.0 - '@jest/fake-timers': 29.7.0 - '@jest/globals': 29.7.0 - '@jest/source-map': 29.6.3 - '@jest/test-result': 29.7.0 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/environment": 29.7.0 + "@jest/fake-timers": 29.7.0 + "@jest/globals": 29.7.0 + "@jest/source-map": 29.6.3 + "@jest/test-result": 29.7.0 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 chalk: 4.1.2 cjs-module-lexer: 1.4.1 collect-v8-coverage: 1.0.2 @@ -2571,14 +3567,14 @@ snapshots: jest-snapshot@29.7.0: dependencies: - '@babel/core': 7.25.9 - '@babel/generator': 7.25.9 - '@babel/plugin-syntax-jsx': 7.25.9(@babel/core@7.25.9) - '@babel/plugin-syntax-typescript': 7.25.9(@babel/core@7.25.9) - '@babel/types': 7.25.9 - '@jest/expect-utils': 29.7.0 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 + "@babel/core": 7.25.9 + "@babel/generator": 7.25.9 + "@babel/plugin-syntax-jsx": 7.25.9(@babel/core@7.25.9) + "@babel/plugin-syntax-typescript": 7.25.9(@babel/core@7.25.9) + "@babel/types": 7.25.9 + "@jest/expect-utils": 29.7.0 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 babel-preset-current-node-syntax: 1.1.0(@babel/core@7.25.9) chalk: 4.1.2 expect: 29.7.0 @@ -2596,8 +3592,8 @@ snapshots: jest-util@29.7.0: dependencies: - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 chalk: 4.1.2 ci-info: 3.9.0 graceful-fs: 4.2.11 @@ -2605,7 +3601,7 @@ snapshots: jest-validate@29.7.0: dependencies: - '@jest/types': 29.6.3 + "@jest/types": 29.6.3 camelcase: 6.3.0 chalk: 4.1.2 jest-get-type: 29.6.3 @@ -2614,9 +3610,9 @@ snapshots: jest-watcher@29.7.0: dependencies: - '@jest/test-result': 29.7.0 - '@jest/types': 29.6.3 - '@types/node': 20.16.14 + "@jest/test-result": 29.7.0 + "@jest/types": 29.6.3 + "@types/node": 20.16.14 ansi-escapes: 4.3.2 chalk: 4.1.2 emittery: 0.13.1 @@ -2625,19 +3621,19 @@ snapshots: jest-worker@29.7.0: dependencies: - '@types/node': 20.16.14 + "@types/node": 20.16.14 jest-util: 29.7.0 merge-stream: 2.0.0 supports-color: 8.1.1 jest@29.7.0(@types/node@20.16.14)(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3)): dependencies: - '@jest/core': 29.7.0(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3)) - '@jest/types': 29.6.3 + "@jest/core": 29.7.0(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3)) + "@jest/types": 29.6.3 import-local: 3.2.0 jest-cli: 29.7.0(@types/node@20.16.14)(ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3)) transitivePeerDependencies: - - '@types/node' + - "@types/node" - babel-plugin-macros - supports-color - ts-node @@ -2746,7 +3742,7 @@ snapshots: parse-json@5.2.0: dependencies: - '@babel/code-frame': 7.25.9 + "@babel/code-frame": 7.25.9 error-ex: 1.3.2 json-parse-even-better-errors: 2.3.1 lines-and-columns: 1.2.4 @@ -2795,7 +3791,7 @@ snapshots: pretty-format@29.7.0: dependencies: - '@jest/schemas': 29.6.3 + "@jest/schemas": 29.6.3 ansi-styles: 5.2.0 react-is: 18.3.1 @@ -2838,10 +3834,10 @@ snapshots: rrweb@2.0.0-alpha.17: dependencies: - '@rrweb/types': 2.0.0-alpha.17 - '@rrweb/utils': 2.0.0-alpha.17 - '@types/css-font-loading-module': 0.0.7 - '@xstate/fsm': 1.6.5 + "@rrweb/types": 2.0.0-alpha.17 + "@rrweb/utils": 2.0.0-alpha.17 + "@types/css-font-loading-module": 0.0.7 + "@xstate/fsm": 1.6.5 base64-arraybuffer: 1.0.2 mitt: 3.0.1 rrdom: 2.0.0-alpha.17 @@ -2917,7 +3913,7 @@ snapshots: test-exclude@6.0.0: dependencies: - '@istanbuljs/schema': 0.1.3 + "@istanbuljs/schema": 0.1.3 glob: 7.2.3 minimatch: 3.1.2 @@ -2941,19 +3937,19 @@ snapshots: typescript: 5.6.3 yargs-parser: 21.1.1 optionalDependencies: - '@babel/core': 7.25.9 - '@jest/transform': 29.7.0 - '@jest/types': 29.6.3 + "@babel/core": 7.25.9 + "@jest/transform": 29.7.0 + "@jest/types": 29.6.3 babel-jest: 29.7.0(@babel/core@7.25.9) ts-node@10.9.2(@types/node@20.16.14)(typescript@5.6.3): dependencies: - '@cspotcode/source-map-support': 0.8.1 - '@tsconfig/node10': 1.0.11 - '@tsconfig/node12': 1.0.11 - '@tsconfig/node14': 1.0.3 - '@tsconfig/node16': 1.0.4 - '@types/node': 20.16.14 + "@cspotcode/source-map-support": 0.8.1 + "@tsconfig/node10": 1.0.11 + "@tsconfig/node12": 1.0.11 + "@tsconfig/node14": 1.0.3 + "@tsconfig/node16": 1.0.4 + "@types/node": 20.16.14 acorn: 8.13.0 acorn-walk: 8.3.4 arg: 4.1.3 @@ -2984,8 +3980,8 @@ snapshots: v8-to-istanbul@9.3.0: dependencies: - '@jridgewell/trace-mapping': 0.3.25 - '@types/istanbul-lib-coverage': 2.0.6 + "@jridgewell/trace-mapping": 0.3.25 + "@types/istanbul-lib-coverage": 2.0.6 convert-source-map: 2.0.0 walker@1.0.8: diff --git a/js/sdk/src/baseClient.ts b/js/sdk/src/baseClient.ts new file mode 100644 index 000000000..f0b918fae --- /dev/null +++ b/js/sdk/src/baseClient.ts @@ -0,0 +1,171 @@ +import axios, { + AxiosInstance, + Method, + AxiosResponse, + AxiosRequestConfig, +} from "axios"; +import FormData from "form-data"; + +let fs: any; +if (typeof window === "undefined") { + import("fs").then((module) => { + fs = module; + }); +} + +function handleRequestError(response: AxiosResponse): void { + if (response.status < 400) { + return; + } + + let message: string; + const errorContent = response.data; + + if (typeof errorContent === "object" && errorContent !== null) { + message = + errorContent.message || + (errorContent.detail && errorContent.detail.message) || + (typeof errorContent.detail === "string" && errorContent.detail) || + JSON.stringify(errorContent); + } else { + message = String(errorContent); + } + + throw new Error(`Status ${response.status}: ${message}`); +} + +export abstract class BaseClient { + protected axiosInstance: AxiosInstance; + protected baseUrl: string; + protected accessToken: string | null; + protected refreshToken: string | null; + protected anonymousTelemetry: boolean; + + constructor(baseURL: string, prefix: string = "", anonymousTelemetry = true) { + this.baseUrl = `${baseURL}${prefix}`; + this.accessToken = null; + this.refreshToken = null; + this.anonymousTelemetry = anonymousTelemetry; + + this.axiosInstance = axios.create({ + baseURL: this.baseUrl, + headers: { + "Content-Type": "application/json", + }, + }); + } + + protected async _makeRequest( + method: Method, + endpoint: string, + options: any = {}, + version: "v2" | "v3" = "v2", + ): Promise { + const url = `/${version}/${endpoint}`; + const config: AxiosRequestConfig = { + method, + url, + headers: { ...options.headers }, + params: options.params, + ...options, + responseType: options.responseType || "json", + }; + + config.headers = config.headers || {}; + + if (options.params) { + config.paramsSerializer = (params) => { + return Object.entries(params) + .map(([key, value]) => { + if (Array.isArray(value)) { + return value + .map( + (v) => `${encodeURIComponent(key)}=${encodeURIComponent(v)}`, + ) + .join("&"); + } + return `${encodeURIComponent(key)}=${encodeURIComponent(String(value))}`; + }) + .join("&"); + }; + } + + if (options.data) { + if (typeof FormData !== "undefined" && options.data instanceof FormData) { + config.data = options.data; + delete config.headers["Content-Type"]; + } else if (typeof options.data === "object") { + if ( + config.headers["Content-Type"] === "application/x-www-form-urlencoded" + ) { + config.data = Object.keys(options.data) + .map( + (key) => + `${encodeURIComponent(key)}=${encodeURIComponent(options.data[key])}`, + ) + .join("&"); + } else { + config.data = JSON.stringify(options.data); + if (method !== "DELETE") { + config.headers["Content-Type"] = "application/json"; + } else { + config.headers["Content-Type"] = "application/json"; + config.data = JSON.stringify(options.data); + } + } + } else { + config.data = options.data; + } + } + + if ( + this.accessToken && + !["register", "login", "verify_email", "health"].includes(endpoint) + ) { + config.headers.Authorization = `Bearer ${this.accessToken}`; + } + + if (options.responseType === "stream") { + const fetchHeaders: Record = {}; + Object.entries(config.headers).forEach(([key, value]) => { + if (typeof value === "string") { + fetchHeaders[key] = value; + } + }); + const response = await fetch(`${this.baseUrl}/${version}/${endpoint}`, { + method, + headers: fetchHeaders, + body: config.data, + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + return response.body as unknown as T; + } + + try { + const response = await this.axiosInstance.request(config); + return options.returnFullResponse + ? (response as any as T) + : response.data; + } catch (error) { + if (axios.isAxiosError(error) && error.response) { + handleRequestError(error.response); + } + throw error; + } + } + + protected _ensureAuthenticated(): void { + // if (!this.accessToken) { + // throw new Error("Not authenticated. Please login first."); + // } + } + + setTokens(accessToken: string, refreshToken: string): void { + this.accessToken = accessToken; + this.refreshToken = refreshToken; + } +} diff --git a/js/sdk/src/index.ts b/js/sdk/src/index.ts index 0f4285074..4a149f935 100644 --- a/js/sdk/src/index.ts +++ b/js/sdk/src/index.ts @@ -1,3 +1,4 @@ export { r2rClient } from "./r2rClient"; export * from "./models"; +export * from "./types"; export { feature, initializeTelemetry } from "./feature"; diff --git a/js/sdk/src/models.tsx b/js/sdk/src/models.tsx index 1684ba14d..f0d467a27 100644 --- a/js/sdk/src/models.tsx +++ b/js/sdk/src/models.tsx @@ -1,6 +1,6 @@ export interface TokenInfo { token: string; - token_type: string; + tokenType: string; } export interface LoginResponse { @@ -20,52 +20,50 @@ export interface RefreshTokenResponse { export interface GenerationConfig { model?: string; temperature?: number; - top_p?: number; - max_tokens_to_sample?: number; + topP?: number; + maxTokensToSample?: number; stream?: boolean; functions?: Array>; tools?: Array>; - add_generation_kwargs?: Record; - api_base?: string; - response_format?: string; + addGenerationKwargs?: Record; + apiBase?: string; + responseFormat?: string; } export interface HybridSearchSettings { - full_text_weight: number; - semantic_weight: number; - full_text_limit: number; - rrf_k: number; + fullTextWeight: number; + semanticWeight: number; + fullTextLimit: number; + rrfK: number; } -export interface VectorSearchSettings { - use_vector_search?: boolean; - use_hybrid_search?: boolean; +export interface ChunkSearchSettings { + useVectorSearch?: boolean; + useHybridSearch?: boolean; filters?: Record; - search_limit?: number; + searchLimit?: number; offset?: number; - selected_collection_ids?: string[]; - index_measure: IndexMeasure; - include_values?: boolean; - include_metadatas?: boolean; + selectedCollectionIds?: string[]; + indexMeasure: IndexMeasure; + includeScores?: boolean; + includeMetadatas?: boolean; probes?: number; - ef_search?: number; - hybrid_search_settings?: HybridSearchSettings; - search_strategy?: string; + efSearch?: number; + hybridSearchSettings?: HybridSearchSettings; + searchStrategy?: string; } export interface KGSearchSettings { - use_kg_search?: boolean; + useKgSearch?: boolean; filters?: Record; - selected_collection_ids?: string[]; - graphrag_map_system_prompt?: string; - kg_search_type?: "local"; - kg_search_level?: number | null; - generation_config?: GenerationConfig; - // entity_types?: any[]; - // relationships?: any[]; - max_community_description_length?: number; - max_llm_queries_for_global_search?: number; - local_search_limits?: Record; + selectedCollectionIds?: string[]; + graphragMapSystemPrompt?: string; + kgSearchType?: "local"; + kgSearchLevel?: number | null; + generationConfig?: GenerationConfig; + maxCommunityDescriptionLength?: number; + maxLlmQueriesForGlobalSearch?: number; + localSearchLimits?: Record; } export enum KGRunType { @@ -74,27 +72,27 @@ export enum KGRunType { } export interface KGCreationSettings { - kg_triples_extraction_prompt?: string; - kg_entity_description_prompt?: string; - force_kg_creation?: boolean; - entity_types?: string[]; - relation_types?: string[]; - extractions_merge_count?: number; - max_knowledge_triples?: number; - max_description_input_length?: number; - generation_config?: GenerationConfig; + kgRelationshipsExtractionPrompt?: string; + kgEntityDescriptionPrompt?: string; + forceKgCreation?: boolean; + entityTypes?: string[]; + relationTypes?: string[]; + extractionsMergeCount?: number; + maxKnowledgeRelationships?: number; + maxDescriptionInputLength?: number; + generationConfig?: GenerationConfig; } export interface KGEnrichmentSettings { - force_kg_enrichment?: boolean; - community_reports_prompt?: string; - max_summary_input_length?: number; - generation_config?: GenerationConfig; - leiden_params?: Record; + forceKgEnrichment?: boolean; + communityReportsPrompt?: string; + maxSummaryInputLength?: number; + generationConfig?: GenerationConfig; + leidenParams?: Record; } export interface KGEntityDeduplicationSettings { - kg_entity_deduplication_type?: KGEntityDeduplicationType; + kgEntityDeduplicationType?: KGEntityDeduplicationType; } export enum KGEntityDeduplicationType { @@ -111,12 +109,7 @@ export interface KGLocalSearchResult { export interface KGGlobalSearchResult { query: string; - search_result: string[]; -} - -export interface KGSearchResult { - local_result?: KGLocalSearchResult; - global_result?: KGGlobalSearchResult; + searchResult: string[]; } export interface Message { @@ -125,7 +118,7 @@ export interface Message { } export interface R2RDocumentChunksRequest { - document_id: string; + documentId: string; } export enum IndexMeasure { diff --git a/js/sdk/src/r2rClient.ts b/js/sdk/src/r2rClient.ts index 3e3a9e56f..3f9ed4bb4 100644 --- a/js/sdk/src/r2rClient.ts +++ b/js/sdk/src/r2rClient.ts @@ -1,11 +1,19 @@ -import axios, { - AxiosInstance, - Method, - AxiosResponse, - AxiosRequestConfig, -} from "axios"; +import axios, { Method } from "axios"; import FormData from "form-data"; +import { BaseClient } from "./baseClient"; + +import { ChunksClient } from "./v3/clients/chunks"; +import { CollectionsClient } from "./v3/clients/collections"; +import { ConversationsClient } from "./v3/clients/conversations"; +import { DocumentsClient } from "./v3/clients/documents"; +import { GraphsClient } from "./v3/clients/graphs"; +import { IndiciesClient } from "./v3/clients/indices"; +import { PromptsClient } from "./v3/clients/prompts"; +import { RetrievalClient } from "./v3/clients/retrieval"; +import { SystemClient } from "./v3/clients/system"; +import { UsersClient } from "./v3/clients/users"; + let fs: any; if (typeof window === "undefined") { import("fs").then((module) => { @@ -19,7 +27,7 @@ import { TokenInfo, Message, RefreshTokenResponse, - VectorSearchSettings, + ChunkSearchSettings, KGSearchSettings, KGRunType, KGCreationSettings, @@ -29,51 +37,31 @@ import { RawChunk, } from "./models"; -function handleRequestError(response: AxiosResponse): void { - if (response.status < 400) { - return; - } - - let message: string; - const errorContent = response.data; - - if ( - typeof errorContent === "object" && - errorContent !== null && - "detail" in errorContent - ) { - const { detail } = errorContent; - if (typeof detail === "object" && detail !== null) { - message = (detail as { message?: string }).message || response.statusText; - } else { - message = String(detail); - } - } else { - message = String(errorContent); - } - - throw new Error(`Status ${response.status}: ${message}`); -} - -export class r2rClient { - private axiosInstance: AxiosInstance; - private baseUrl: string; - private anonymousTelemetry: boolean; - - // Authorization tokens - private accessToken: string | null; - private refreshToken: string | null; - - constructor( - baseURL: string, - prefix: string = "/v2", - anonymousTelemetry = true, - ) { - this.baseUrl = `${baseURL}${prefix}`; - this.anonymousTelemetry = anonymousTelemetry; - - this.accessToken = null; - this.refreshToken = null; +export class r2rClient extends BaseClient { + public readonly chunks: ChunksClient; + public readonly collections: CollectionsClient; + public readonly conversations: ConversationsClient; + public readonly documents: DocumentsClient; + public readonly graphs: GraphsClient; + public readonly indices: IndiciesClient; + public readonly prompts: PromptsClient; + public readonly retrieval: RetrievalClient; + public readonly system: SystemClient; + public readonly users: UsersClient; + + constructor(baseURL: string, anonymousTelemetry = true) { + super(baseURL, "", anonymousTelemetry); + + this.chunks = new ChunksClient(this); + this.collections = new CollectionsClient(this); + this.conversations = new ConversationsClient(this); + this.documents = new DocumentsClient(this); + this.graphs = new GraphsClient(this); + this.indices = new IndiciesClient(this); + this.prompts = new PromptsClient(this); + this.retrieval = new RetrievalClient(this); + this.system = new SystemClient(this); + this.users = new UsersClient(this); initializeTelemetry(this.anonymousTelemetry); @@ -108,117 +96,21 @@ export class r2rClient { }); } - setTokens(accessToken: string, refreshToken: string): void { - this.accessToken = accessToken; - this.refreshToken = refreshToken; - } - - private async _makeRequest( + public makeRequest( method: Method, endpoint: string, options: any = {}, ): Promise { - const url = `${endpoint}`; - const config: AxiosRequestConfig = { - method, - url, - headers: { ...options.headers }, - params: options.params, - ...options, - responseType: options.responseType || "json", - }; - - config.headers = config.headers || {}; - - if (options.params) { - config.paramsSerializer = (params) => { - return Object.entries(params) - .map(([key, value]) => { - if (Array.isArray(value)) { - return value - .map( - (v) => `${encodeURIComponent(key)}=${encodeURIComponent(v)}`, - ) - .join("&"); - } - return `${encodeURIComponent(key)}=${encodeURIComponent(String(value))}`; - }) - .join("&"); - }; - } - - if (options.data) { - if (typeof FormData !== "undefined" && options.data instanceof FormData) { - config.data = options.data; - delete config.headers["Content-Type"]; - } else if (typeof options.data === "object") { - if ( - config.headers["Content-Type"] === "application/x-www-form-urlencoded" - ) { - config.data = Object.keys(options.data) - .map( - (key) => - `${encodeURIComponent(key)}=${encodeURIComponent(options.data[key])}`, - ) - .join("&"); - } else { - config.data = JSON.stringify(options.data); - if (method !== "DELETE") { - config.headers["Content-Type"] = "application/json"; - } else { - config.headers["Content-Type"] = "application/json"; - config.data = JSON.stringify(options.data); - } - } - } else { - config.data = options.data; - } - } - - if ( - this.accessToken && - !["register", "login", "verify_email", "health"].includes(endpoint) - ) { - config.headers.Authorization = `Bearer ${this.accessToken}`; - } - - if (options.responseType === "stream") { - const fetchHeaders: Record = {}; - Object.entries(config.headers).forEach(([key, value]) => { - if (typeof value === "string") { - fetchHeaders[key] = value; - } - }); - const response = await fetch(`${this.baseUrl}/${endpoint}`, { - method, - headers: fetchHeaders, - body: config.data, - }); - - if (!response.ok) { - throw new Error(`HTTP error! status: ${response.status}`); - } - - return response.body as unknown as T; - } + return this._makeRequest(method, endpoint, options, "v3"); + } - try { - const response = await this.axiosInstance.request(config); - return options.returnFullResponse - ? (response as any as T) - : response.data; - } catch (error) { - if (axios.isAxiosError(error) && error.response) { - handleRequestError(error.response); - } - throw error; - } + public getRefreshToken(): string | null { + return this.refreshToken; } - private _ensureAuthenticated(): void { - // if (!this.accessToken) { - // throw new Error("Not authenticated. Please login first."); - // } + setTokens(accessToken: string | null, refreshToken: string | null): void { + this.accessToken = accessToken; + this.refreshToken = refreshToken; } // ----------------------------------------------------------------------------- @@ -231,6 +123,7 @@ export class r2rClient { * @param email The email of the user to register. * @param password The password of the user to register. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.users.register` instead. */ @feature("register") @@ -244,6 +137,7 @@ export class r2rClient { * Verifies the email of a user with the given verification code. * @param verification_code The verification code to verify the email with. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.users.verifyEmail` instead. */ @feature("verifyEmail") async verifyEmail(email: string, verification_code: string): Promise { @@ -257,6 +151,7 @@ export class r2rClient { * @param email The email of the user to log in. * @param password The password of the user to log in. * @returns A promise that resolves to the response from the server containing the access and refresh tokens. + * @deprecated Use `client.users.login` instead. */ @feature("login") async login( @@ -285,19 +180,25 @@ export class r2rClient { return response.results; } + /** + * Logs in a user using a token. + * @param accessToken The access token to use for authentication. + * @returns A promise that resolves to the response from the server containing the access token. + * @deprecated Use `client.users.loginWithToken` instead. + */ @feature("loginWithToken") async loginWithToken( accessToken: string, - ): Promise<{ access_token: TokenInfo }> { + ): Promise<{ accessToken: TokenInfo }> { this.accessToken = accessToken; try { await this._makeRequest("GET", "user"); return { - access_token: { + accessToken: { token: accessToken, - token_type: "access_token", + tokenType: "access_token", }, }; } catch (error) { @@ -309,6 +210,7 @@ export class r2rClient { /** * Logs out the currently authenticated user. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.users.logout` instead. */ @feature("logout") async logout(): Promise { @@ -323,6 +225,7 @@ export class r2rClient { /** * Retrieves the user information for the currently authenticated user. * @returns A promise that resolves to the response from the server containing the user information. + * @deprecated Use `client.users.list` instead. */ @feature("user") async user(): Promise { @@ -337,6 +240,7 @@ export class r2rClient { * @param bio The updated bio for the user. * @param profilePicture The updated profile picture URL for the user. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.users.update` instead. */ @feature("updateUser") async updateUser( @@ -372,6 +276,7 @@ export class r2rClient { /** * Refreshes the access token for the currently authenticated user. * @returns A promise that resolves to the response from the server containing the new access and refresh tokens. + * @deprecated Use `client.users.refreshAccessToken` instead. */ async refreshAccessToken(): Promise { if (!this.refreshToken) { @@ -404,6 +309,7 @@ export class r2rClient { * @param current_password The current password of the user. * @param new_password The new password to set for the user. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.users.changePassword` instead. */ @feature("changePassword") async changePassword( @@ -424,6 +330,7 @@ export class r2rClient { * Requests a password reset for the user with the given email. * @param email The email of the user to request a password reset for. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.users.requestPasswordReset` instead. */ @feature("requestPasswordReset") async requestPasswordReset(email: string): Promise { @@ -440,6 +347,7 @@ export class r2rClient { * @param resetToken The reset token to confirm the password reset with. * @param newPassword The new password to set for the user. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.users.resetPassword` instead. */ @feature("confirmPasswordReset") async confirmPasswordReset( @@ -456,6 +364,7 @@ export class r2rClient { * @param user_id The ID of the user to delete, defaults to the currently authenticated user. * @param password The password of the user to delete. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.users.delete` instead. */ @feature("deleteUser") async deleteUser(userId: string, password?: string): Promise { @@ -488,6 +397,7 @@ export class r2rClient { * @param files * @param options * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.documents.create` instead. */ @feature("ingestFiles") async ingestFiles( @@ -600,6 +510,7 @@ export class r2rClient { * @param files * @param options * @returns + * @deprecated Use `client.documents.update` instead. */ @feature("updateFiles") async updateFiles( @@ -682,26 +593,14 @@ export class r2rClient { } /** - * Update the metadata of an existing document. - * @param documentId The ID of the document to update. - * @param metadata The new metadata to merge with existing metadata. - * @returns A promise that resolves to the response from the server. + * + * @param chunks + * @param documentId + * @param metadata + * @param run_with_orchestration + * @deprecated use `client.chunks.create` instead. + * @returns */ - @feature("updateDocumentMetadata") - async updateDocumentMetadata( - documentId: string, - metadata: Record, - ): Promise> { - this._ensureAuthenticated(); - return await this._makeRequest( - "POST", - `update_document_metadata/${documentId}`, - { - data: metadata, - }, - ); - } - @feature("ingestChunks") async ingestChunks( chunks: RawChunk[], @@ -727,6 +626,17 @@ export class r2rClient { }); } + /** + * Update the content of an existing chunk. + * + * @param documentId - The ID of the document containing the chunk. + * @param extractionId - The ID of the chunk to update. + * @param text - The new text content of the chunk. + * @param metadata - Optional metadata dictionary for the chunk. + * @param runWithOrchestration - Whether to run the update through orchestration. + * @returns Update results containing processed, failed, and skipped documents. + * @deprecated Use `client.chunks.update` instead. + */ @feature("updateChunk") async updateChunk( documentId: string, @@ -735,16 +645,6 @@ export class r2rClient { metadata?: Record, runWithOrchestration?: boolean, ): Promise> { - /** - * Update the content of an existing chunk. - * - * @param documentId - The ID of the document containing the chunk. - * @param extractionId - The ID of the chunk to update. - * @param text - The new text content of the chunk. - * @param metadata - Optional metadata dictionary for the chunk. - * @param runWithOrchestration - Whether to run the update through orchestration. - * @returns Update results containing processed, failed, and skipped documents. - */ this._ensureAuthenticated(); const data: Record = { @@ -770,6 +670,7 @@ export class r2rClient { * Create a vector index for similarity search. * @param options The options for creating the vector index * @returns Promise resolving to the creation response + * @deprecated Use `client.indices.create` instead. */ @feature("createVectorIndex") async createVectorIndex(options: { @@ -807,6 +708,7 @@ export class r2rClient { * List existing vector indices for a table. * @param options The options for listing vector indices * @returns Promise resolving to the list of indices + * @deprecated Use `client.indices.list` instead. */ @feature("listVectorIndices") async listVectorIndices(options: { @@ -826,6 +728,7 @@ export class r2rClient { * Delete a vector index from a table. * @param options The options for deleting the vector index * @returns Promise resolving to the deletion response + * @deprecated Use `client.indices.delete` instead. */ @feature("deleteVectorIndex") async deleteVectorIndex(options: { @@ -858,6 +761,7 @@ export class r2rClient { /** * Check the health of the R2R deployment. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.system.health` instead. */ async health(): Promise { return await this._makeRequest("GET", "health"); @@ -866,6 +770,7 @@ export class r2rClient { /** * Get statistics about the server, including the start time, uptime, CPU usage, and memory usage. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.system.status` instead. */ @feature("serverStats") async serverStats(): Promise { @@ -879,6 +784,7 @@ export class r2rClient { * @param template The new template for the prompt. * @param input_types The new input types for the prompt. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.prompts.update` instead. */ @feature("updatePrompt") async updatePrompt( @@ -910,6 +816,7 @@ export class r2rClient { * @param name The name of the prompt. * @param template The template for the prompt. * @param input_types The input types for the prompt. + * @deprecated Use `client.prompts.create` instead. */ @feature("addPrompt") async addPrompt( @@ -934,6 +841,7 @@ export class r2rClient { * @param name The name of the prompt to retrieve. * @param inputs Inputs for the prompt. * @param prompt_override Override for the prompt template. + * @deprecated Use `client.prompts.retrieve` instead. * @returns */ @feature("getPrompt") @@ -958,6 +866,7 @@ export class r2rClient { /** * Get all prompts from the system. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.prompts.list` instead. */ @feature("getAllPrompts") async getAllPrompts(): Promise> { @@ -969,6 +878,7 @@ export class r2rClient { * Delete a prompt from the system. * @param prompt_name The name of the prompt to delete. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.prompts.delete` instead. */ @feature("deletePrompt") async deletePrompt(prompt_name: string): Promise> { @@ -981,6 +891,7 @@ export class r2rClient { * @param filter_criteria The filter criteria to use. * @param analysis_types The types of analysis to perform. * @returns A promise that resolves to the response from the server. + * @deprecated This method is deprecated. New, improved analytics features will be added in a future release. */ @feature("analytics") async analytics( @@ -1013,6 +924,7 @@ export class r2rClient { * @param run_type_filter The run type to filter by. * @param max_runs Specifies the maximum number of runs to return. Values outside the range of 1 to 1000 will be adjusted to the nearest valid value with a default of 100. * @returns + * @deprecated Use `client.system.logs` instead. */ @feature("logs") async logs(run_type_filter?: string, max_runs?: number): Promise { @@ -1034,6 +946,7 @@ export class r2rClient { /** * Get the configuration settings for the app. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.system.settings` instead. */ @feature("appSettings") async appSettings(): Promise { @@ -1048,6 +961,7 @@ export class r2rClient { * * @param offset The offset to start listing users from. * @param limit The maximum number of users to return. * @returns + * @deprecated Use `client.users.list` instead. */ @feature("usersOverview") async usersOverview( @@ -1079,6 +993,7 @@ export class r2rClient { * Delete data from the database given a set of filters. * @param filters The filters to delete by. * @returns The results of the deletion. + * @deprecated Use `client..delete` instead for the specific object you want to delete. */ @feature("delete") async delete(filters: { [key: string]: any }): Promise { @@ -1095,6 +1010,7 @@ export class r2rClient { * Download the raw file associated with a document. * @param documentId The ID of the document to retrieve. * @returns A promise that resolves to a Blob representing the PDF. + * @deprecated Use `client.documents.download` instead. */ @feature("downloadFile") async downloadFile(documentId: string): Promise { @@ -1109,6 +1025,7 @@ export class r2rClient { * @param offset The offset to start listing documents from. * @param limit The maximum number of documents to return. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.documents.list` instead. */ @feature("documentsOverview") async documentsOverview( @@ -1136,6 +1053,7 @@ export class r2rClient { * Get the chunks for a document. * @param document_id The ID of the document to get the chunks for. * @returns A promise that resolves to the response from the server. + * @deprecated Use `client.documents.listChunks` instead. */ @feature("documentChunks") async documentChunks( @@ -1166,6 +1084,7 @@ export class r2rClient { * @param collectionIds List of collection IDs to get an overview for. * @param limit The maximum number of collections to return. * @param offset The offset to start listing collections from. + * @deprecated use `client.collections.list` instead * @returns */ @feature("collectionsOverview") @@ -1195,6 +1114,7 @@ export class r2rClient { * @param name The name of the collection. * @param description The description of the collection. * @returns + * @deprecated use `client.collections.create` instead */ @feature("createCollection") async createCollection( @@ -1215,6 +1135,7 @@ export class r2rClient { * Get a collection by its ID. * @param collectionId The ID of the collection to get. * @returns A promise that resolves to the response from the server. + * @deprecated use `client.collections.retrieve` instead */ @feature("getCollection") async getCollection(collectionId: string): Promise> { @@ -1231,6 +1152,7 @@ export class r2rClient { * @param name The new name for the collection. * @param description The new description of the collection. * @returns A promise that resolves to the response from the server. + * @deprecated use `client.collections.update` instead */ @feature("updateCollection") async updateCollection( @@ -1258,6 +1180,7 @@ export class r2rClient { * Delete a collection by its ID. * @param collectionId The ID of the collection to delete. * @returns A promise that resolves to the response from the server. + * @deprecated use `client.collections.delete` instead */ @feature("deleteCollection") async deleteCollection(collectionId: string): Promise> { @@ -1273,6 +1196,7 @@ export class r2rClient { * @param offset The offset to start listing collections from. * @param limit The maximum numberof collections to return. * @returns + * @deprecated use `client.collections.list` instead */ @feature("listCollections") async listCollections( @@ -1297,6 +1221,7 @@ export class r2rClient { * @param userId The ID of the user to add. * @param collectionId The ID of the collection to add the user to. * @returns A promise that resolves to the response from the server. + * @deprecated use `client.collections.addUser` instead */ @feature("addUserToCollection") async addUserToCollection( @@ -1314,6 +1239,7 @@ export class r2rClient { * @param userId The ID of the user to remove. * @param collectionId The ID of the collection to remove the user from. * @returns + * @deprecated use `client.collections.removeUser` instead */ @feature("removeUserFromCollection") async removeUserFromCollection( @@ -1332,6 +1258,7 @@ export class r2rClient { * @param offset The offset to start listing users from. * @param limit The maximum number of users to return. * @returns A promise that resolves to the response from the server. + * @deprecated use `client.collections.listUsers` instead */ @feature("getUsersInCollection") async getUsersInCollection( @@ -1360,6 +1287,7 @@ export class r2rClient { * Get all collections that a user is a member of. * @param userId The ID of the user to get collections for. * @returns A promise that resolves to the response from the server. + * @deprecated use `client.users.listCollections` instead */ @feature("getCollectionsForUser") async getCollectionsForUser( @@ -1389,6 +1317,7 @@ export class r2rClient { * @param document_id The ID of the document to assign. * @param collection_id The ID of the collection to assign the document to. * @returns + * @deprecated use `client.collections.addDocument` instead */ @feature("assignDocumentToCollection") async assignDocumentToCollection( @@ -1407,6 +1336,7 @@ export class r2rClient { * @param document_id The ID of the document to remove. * @param collection_id The ID of the collection to remove the document from. * @returns A promise that resolves to the response from the server. + * @deprecated use `client.collections.removeDocument` instead */ @feature("removeDocumentFromCollection") async removeDocumentFromCollection( @@ -1424,6 +1354,7 @@ export class r2rClient { * Get all collections that a document is assigned to. * @param documentId The ID of the document to get collections for. * @returns + * @deprecated use `client.collections.listDocuments` instead */ @feature("getDocumentCollections") async getDocumentCollections( @@ -1454,6 +1385,7 @@ export class r2rClient { * @param offset The offset to start listing documents from. * @param limit The maximum number of documents to return. * @returns A promise that resolves to the response from the server. + * @deprecated use `client.collections.listDocuments` instead */ @feature("getDocumentsInCollection") async getDocumentsInCollection( @@ -1482,6 +1414,7 @@ export class r2rClient { * Get an overview of existing conversations. * @param limit The maximum number of conversations to return. * @param offset The offset to start listing conversations from. + * @deprecated use `client.conversations.list` instead * @returns */ @feature("conversationsOverview") @@ -1510,6 +1443,7 @@ export class r2rClient { * Get a conversation by its ID. * @param conversationId The ID of the conversation to get. * @param branchId The ID of the branch (optional). + * @deprecated use `client.conversations.retrieve` instead * @returns A promise that resolves to the response from the server. */ @feature("getConversation") @@ -1527,6 +1461,7 @@ export class r2rClient { /** * Create a new conversation. + * @deprecated use `client.conversations.create` instead * @returns A promise that resolves to the response from the server. */ @feature("createConversation") @@ -1539,6 +1474,7 @@ export class r2rClient { * Add a message to an existing conversation. * @param conversationId * @param message + * @deprecated use `client.conversations.addMessage` instead * @returns */ @feature("addMessage") @@ -1563,6 +1499,7 @@ export class r2rClient { * Update a message in an existing conversation. * @param message_id The ID of the message to update. * @param message The updated message. + * @deprecated use `client.conversations.updateMessage` instead * @returns A promise that resolves to the response from the server. */ @feature("updateMessage") @@ -1596,6 +1533,7 @@ export class r2rClient { /** * Get an overview of branches in a conversation. * @param conversationId The ID of the conversation to get branches for. + * @deprecated use `client.conversations.listBranches` instead * @returns A promise that resolves to the response from the server. */ @feature("branchesOverview") @@ -1648,6 +1586,7 @@ export class r2rClient { /** * Delete a conversation by its ID. * @param conversationId The ID of the conversation to delete. + * @deprecated use `client.conversations.delete` instead * @returns A promise that resolves to the response from the server. */ @feature("deleteConversation") @@ -1670,20 +1609,20 @@ export class r2rClient { * * @param collection_id The ID of the collection to create the graph for. * @param run_type The type of run to perform. - * @param kg_creation_settings Settings for the graph creation process. + * @param graph_creation_settings Settings for the graph creation process. */ @feature("createGraph") async createGraph( collection_id?: string, run_type?: KGRunType, - kg_creation_settings?: KGCreationSettings | Record, + graph_creation_settings?: KGCreationSettings | Record, ): Promise> { this._ensureAuthenticated(); const json_data: Record = { collection_id, run_type, - kg_creation_settings, + graph_creation_settings, }; Object.keys(json_data).forEach( @@ -1698,20 +1637,20 @@ export class r2rClient { * * @param collection_id The ID of the collection to enrich the graph for. * @param run_type The type of run to perform. - * @param kg_enrichment_settings Settings for the graph enrichment process. + * @param graph_enrichment_settings Settings for the graph enrichment process. */ @feature("enrichGraph") async enrichGraph( collection_id?: string, run_type?: KGRunType, - kg_enrichment_settings?: KGEnrichmentSettings | Record, + graph_enrichment_settings?: KGEnrichmentSettings | Record, ): Promise { this._ensureAuthenticated(); const json_data: Record = { collection_id, run_type, - kg_enrichment_settings, + graph_enrichment_settings, }; Object.keys(json_data).forEach( @@ -1762,13 +1701,13 @@ export class r2rClient { } /** - * Retrieve triples from the knowledge graph. + * Retrieve relationships from the knowledge graph. * @returns A promise that resolves to the response from the server. * @param collection_id The ID of the collection to retrieve entities for. * @param offset The offset for pagination. * @param limit The limit for pagination. * @param entity_level The level of entity to filter by. - * @param triple_ids Triple IDs to filter by. + * @param relationship_ids Relationship IDs to filter by. */ @feature("getTriples") async getTriples( @@ -1776,7 +1715,7 @@ export class r2rClient { offset?: number, limit?: number, entity_level?: string, - triple_ids?: string[], + relationship_ids?: string[], ): Promise { this._ensureAuthenticated(); @@ -1793,8 +1732,8 @@ export class r2rClient { if (entity_level !== undefined) { params.entity_level = entity_level; } - if (triple_ids !== undefined) { - params.entity_ids = triple_ids; + if (relationship_ids !== undefined) { + params.entity_ids = relationship_ids; } return this._makeRequest("GET", `triples`, { params }); @@ -1806,7 +1745,7 @@ export class r2rClient { * @param offset The offset for pagination. * @param limit The limit for pagination. * @param levels Levels to filter by. - * @param community_numbers Community numbers to filter by. + * @param community_ids Community numbers to filter by. * @returns */ @feature("getCommunities") @@ -1815,7 +1754,7 @@ export class r2rClient { offset?: number, limit?: number, levels?: number, - community_numbers?: number[], + community_ids?: number[], ): Promise { this._ensureAuthenticated(); @@ -1832,8 +1771,8 @@ export class r2rClient { if (levels !== undefined) { params.levels = levels; } - if (community_numbers !== undefined) { - params.community_numbers = community_numbers; + if (community_ids !== undefined) { + params.community_ids = community_ids; } return this._makeRequest("GET", `communities`, { params }); @@ -1927,7 +1866,7 @@ export class r2rClient { @feature("searchDocuments") async searchDocuments( query: string, - vector_search_settings?: VectorSearchSettings | Record, + vector_search_settings?: ChunkSearchSettings | Record, ): Promise { this._ensureAuthenticated(); const json_data: Record = { @@ -1948,21 +1887,22 @@ export class r2rClient { * Conduct a vector and/or KG search. * @param query The query to search for. * @param vector_search_settings Vector search settings. - * @param kg_search_settings KG search settings. + * @param graph_search_settings KG search settings. + * @deprecated use `client.retrieval.search` instead * @returns */ @feature("search") async search( query: string, - vector_search_settings?: VectorSearchSettings | Record, - kg_search_settings?: KGSearchSettings | Record, + vector_search_settings?: ChunkSearchSettings | Record, + graph_search_settings?: KGSearchSettings | Record, ): Promise { this._ensureAuthenticated(); const json_data: Record = { query, vector_search_settings, - kg_search_settings, + graph_search_settings, }; Object.keys(json_data).forEach( @@ -1976,17 +1916,18 @@ export class r2rClient { * Conducts a Retrieval Augmented Generation (RAG) search with the given query. * @param query The query to search for. * @param vector_search_settings Vector search settings. - * @param kg_search_settings KG search settings. + * @param graph_search_settings KG search settings. * @param rag_generation_config RAG generation configuration. * @param task_prompt_override Task prompt override. * @param include_title_if_available Include title if available. + * @deprecated use `client.retrieval.search` instead * @returns A promise that resolves to the response from the server. */ @feature("rag") async rag( query: string, - vector_search_settings?: VectorSearchSettings | Record, - kg_search_settings?: KGSearchSettings | Record, + vector_search_settings?: ChunkSearchSettings | Record, + graph_search_settings?: KGSearchSettings | Record, rag_generation_config?: GenerationConfig | Record, task_prompt_override?: string, include_title_if_available?: boolean, @@ -1996,7 +1937,7 @@ export class r2rClient { const json_data: Record = { query, vector_search_settings, - kg_search_settings, + graph_search_settings, rag_generation_config, task_prompt_override, include_title_if_available, @@ -2034,19 +1975,20 @@ export class r2rClient { * @param messages The messages to send to the agent. * @param rag_generation_config RAG generation configuration. * @param vector_search_settings Vector search settings. - * @param kg_search_settings KG search settings. + * @param graph_search_settings KG search settings. * @param task_prompt_override Task prompt override. * @param include_title_if_available Include title if available. * @param conversation_id The ID of the conversation, if not a new conversation. * @param branch_id The ID of the branch to use, if not a new branch. * @returns A promise that resolves to the response from the server. + * @deprecated use `client.retrieval.agent` instead */ @feature("agent") async agent( messages: Message[], rag_generation_config?: GenerationConfig | Record, - vector_search_settings?: VectorSearchSettings | Record, - kg_search_settings?: KGSearchSettings | Record, + vector_search_settings?: ChunkSearchSettings | Record, + graph_search_settings?: KGSearchSettings | Record, task_prompt_override?: string, include_title_if_available?: boolean, conversation_id?: string, @@ -2057,7 +1999,7 @@ export class r2rClient { const json_data: Record = { messages, vector_search_settings, - kg_search_settings, + graph_search_settings, rag_generation_config, task_prompt_override, include_title_if_available, diff --git a/js/sdk/src/types.ts b/js/sdk/src/types.ts new file mode 100644 index 000000000..49493060a --- /dev/null +++ b/js/sdk/src/types.ts @@ -0,0 +1,333 @@ +export interface UnprocessedChunk { + id: string; + document_id?: string; + collection_ids: string[]; + metadata: Record; + text: string; +} + +// Response wrappers +export interface ResultsWrapper { + results: T; +} + +export interface PaginatedResultsWrapper extends ResultsWrapper { + total_entries: number; +} + +// Generic response types +export interface GenericBooleanResponse { + success: boolean; +} + +export interface GenericMessageResponse { + message: string; +} + +// Chunk types +export interface ChunkResponse { + id: string; + document_id: string; + user_id: string; + collection_ids: string[]; + text: string; + metadata: Record; + vector?: any; +} + +// Collection types +export interface CollectionResponse { + id: string; + user_id?: string; + name: string; + description?: string; + created_at: string; + updated_at: string; + user_count: number; + document_count: number; +} + +// Community types +export interface CommunityResponse { + id: string; + name: string; + summary: string; + findings: string[]; + communityId?: string; + graphId?: string; + collectionId?: string; + rating?: number; + ratingExplanation?: string; + descriptionEmbedding?: string; +} + +// Conversation types +export interface ConversationResponse { + id: string; + created_at: string; + user_id?: string; + name?: string; +} + +export interface MessageResponse { + id: string; + message: any; + metadata: Record; +} + +export interface BranchResponse { + branch_id: string; + branch_point_id?: string; + content?: string; + created_at: string; + user_id?: string; + name?: string; +} + +// Document types +export interface DocumentResponse { + id: string; + collection_ids: string[]; + user_id: string; + document_type: string; + metadata: Record; + title?: string; + version: string; + size_in_bytes?: number; + ingestion_status: string; + kg_extraction_status: string; + created_at: string; + updated_at: string; + ingestion_attempt_number?: number; + summary?: string; + summary_embedding?: string; +} + +// Entity types +export interface EntityResponse { + id: string; + sid?: string; + name: string; + category?: string; + description?: string; + chunk_ids: string[]; + description_embedding?: string; + document_id: string; + document_ids: string[]; + graph_ids: string[]; + user_id: string; + last_modified_by: string; + created_at: string; + updated_at: string; + attributes?: Record; +} + +// Graph types +export interface GraphResponse { + id: string; + user_id: string; + name: string; + description: string; + status: string; + created_at: string; + updated_at: string; +} + +// Ingestion types +export interface IngestionResponse { + message: string; + task_id?: string; + document_id: string; +} + +export interface UpdateResponse { + message: string; + task_id?: string; + document_id: string; +} + +export interface IndexConfig { + name?: string; + table_name?: string; + index_method?: string; + index_measure?: string; + index_arguments?: string; + index_name?: string; + index_column?: string; + concurrently?: boolean; +} + +// Prompt types +export interface PromptResponse { + id: string; + name: string; + template: string; + created_at: string; + updated_at: string; + input_types: string[]; +} + +// Relationship types +export interface RelationshipResponse { + id: string; + subject: string; + predicate: string; + object: string; + description?: string; + subject_id: string; + object_id: string; + weight: number; + chunk_ids: string[]; + parent_id: string; + metadata: Record; +} + +// Retrieval types +export interface VectorSearchResult { + chunk_id: string; + document_id: string; + user_id: string; + collection_ids: string[]; + score: number; + text: string; + metadata?: Record; +} + +export interface KGSearchResult { + method: string; + content: string; + result_type?: string; + chunks_ids?: string[]; + metadata?: Record; +} + +export interface CombinedSearchResponse { + vector_search_results: VectorSearchResult[]; + graph_search_results?: KGSearchResult[]; +} + +// System types +export interface LogsResponse { + run_id: string; + run_type: string; + entries: Record[]; + timestamp?: string; + user_id?: string; +} + +export interface ServerStats { + start_time: string; + uptime_seconds: number; + cpu_usage: number; + memory_usage: number; +} + +export interface SettingsResponse { + config: Record; + prompts: Record; + r2r_project_name: string; +} + +// User types + +export interface TokenResponse { + access_token: string; + refresh_token: string; +} + +export interface User { + id: string; + email: string; + is_active: boolean; + is_superuser: boolean; + created_at: string; + updated_at: string; + is_verified: boolean; + collection_ids: string[]; + hashed_password?: string; + verification_code_expiry?: string; + name?: string; + bio?: string; + profile_picture?: string; +} + +// Generic Responses +export type WrappedBooleanResponse = ResultsWrapper; +export type WrappedGenericMessageResponse = + ResultsWrapper; + +// Chunk Responses +export type WrappedChunkResponse = ResultsWrapper; +export type WrappedChunksResponse = PaginatedResultsWrapper; + +// Collection Responses +export type WrappedCollectionResponse = ResultsWrapper; +export type WrappedCollectionsResponse = PaginatedResultsWrapper< + CollectionResponse[] +>; + +// Community Responses +export type WrappedCommunityResponse = ResultsWrapper; +export type WrappedCommunitiesResponse = PaginatedResultsWrapper< + CommunityResponse[] +>; + +// Conversation Responses +export type WrappedConversationMessagesResponse = ResultsWrapper< + MessageResponse[] +>; +export type WrappedConversationResponse = + PaginatedResultsWrapper; +export type WrappedConversationsResponse = PaginatedResultsWrapper< + ConversationResponse[] +>; +export type WrappedMessageResponse = ResultsWrapper; +export type WrappedMessagesResponse = PaginatedResultsWrapper< + MessageResponse[] +>; +export type WrappedBranchResponse = ResultsWrapper; +export type WrappedBranchesResponse = PaginatedResultsWrapper; + +// Document Responses +export type WrappedDocumentResponse = ResultsWrapper; +export type WrappedDocumentsResponse = PaginatedResultsWrapper< + DocumentResponse[] +>; + +// Entity Responses +export type WrappedEntityResponse = ResultsWrapper; +export type WrappedEntitiesResponse = PaginatedResultsWrapper; + +// Graph Responses +export type WrappedGraphResponse = ResultsWrapper; +export type WrappedGraphsResponse = PaginatedResultsWrapper; + +// Ingestion Responses +export type WrappedIngestionResponse = ResultsWrapper; +export type WrappedMetadataUpdateResponse = ResultsWrapper; +export type WrappedUpdateResponse = ResultsWrapper; +export type WrappedListVectorIndicesResponse = ResultsWrapper; + +// Prompt Responses +export type WrappedPromptResponse = ResultsWrapper; +export type WrappedPromptsResponse = PaginatedResultsWrapper; + +// Relationship Responses +export type WrappedRelationshipResponse = ResultsWrapper; +export type WrappedRelationshipsResponse = PaginatedResultsWrapper< + RelationshipResponse[] +>; + +// Retrieval Responses +export type WrappedVectorSearchResponse = ResultsWrapper; +export type WrappedSearchResponse = ResultsWrapper; + +// System Responses +export type WrappedSettingsResponse = ResultsWrapper; +export type WrappedLogsResponse = PaginatedResultsWrapper; +export type WrappedServerStatsResponse = ResultsWrapper; + +// User Responses +export type WrappedTokenResponse = ResultsWrapper; +export type WrappedUserResponse = ResultsWrapper; +export type WrappedUsersResponse = PaginatedResultsWrapper; diff --git a/js/sdk/src/v3/clients/chunks.ts b/js/sdk/src/v3/clients/chunks.ts new file mode 100644 index 000000000..a2ce30ada --- /dev/null +++ b/js/sdk/src/v3/clients/chunks.ts @@ -0,0 +1,107 @@ +import { feature } from "../../feature"; +import { r2rClient } from "../../r2rClient"; +import { + UnprocessedChunk, + WrappedBooleanResponse, + WrappedChunkResponse, + WrappedChunksResponse, +} from "../../types"; + +export class ChunksClient { + constructor(private client: r2rClient) {} + + /** + * Create multiple chunks. + * @param chunks List of UnprocessedChunk objects containing: + - id: Optional UUID + - document_id: Optional UUID + - collection_ids: list UUID + - metadata: dict + - text: string + * @param runWithOrchestration Optional flag to run with orchestration + * @returns + */ + @feature("chunks.create") + async create(options: { + chunks: UnprocessedChunk[]; + runWithOrchestration?: boolean; + }): Promise { + return this.client.makeRequest("POST", "chunks", { + data: { + raw_chunks: options.chunks, + runWithOrchestration: options.runWithOrchestration, + }, + }); + } + + /** + * Update an existing chunk. + * @param id ID of the chunk to update + * @param text Optional new text for the chunk + * @param metadata Optional new metadata for the chunk + * @returns + */ + @feature("chunks.update") + async update(options: { + id: string; + text?: string; + metadata?: any; + }): Promise { + return this.client.makeRequest("POST", `chunks/${options.id}`, { + data: options, + }); + } + + /** + * Get a specific chunk. + * @param id ID of the chunk to retrieve + * @returns + */ + @feature("chunks.retrieve") + async retrieve(options: { id: string }): Promise { + return this.client.makeRequest("GET", `chunks/${options.id}`); + } + + /** + * Delete a specific chunk. + * @param id ID of the chunk to delete + * @returns + */ + @feature("chunks.delete") + async delete(options: { id: string }): Promise { + return this.client.makeRequest("DELETE", `chunks/${options.id}`); + } + + /** + * List chunks. + * @param includeVectors Include vector data in response. Defaults to False. + * @param metadataFilters Filter by metadata. Defaults to None. + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("chunks.list") + async list(options?: { + includeVectors?: boolean; + metadataFilters?: Record; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + if (options?.includeVectors) { + params.include_vectors = options.includeVectors; + } + + if (options?.metadataFilters) { + params.metadata_filters = options.metadataFilters; + } + + return this.client.makeRequest("GET", "chunks", { + params, + }); + } +} diff --git a/js/sdk/src/v3/clients/collections.ts b/js/sdk/src/v3/clients/collections.ts new file mode 100644 index 000000000..c77b1eda5 --- /dev/null +++ b/js/sdk/src/v3/clients/collections.ts @@ -0,0 +1,263 @@ +import { feature } from "../../feature"; +import { r2rClient } from "../../r2rClient"; +import { + WrappedBooleanResponse, + WrappedGenericMessageResponse, + WrappedCollectionResponse, + WrappedCollectionsResponse, + WrappedDocumentsResponse, + WrappedUsersResponse, +} from "../../types"; + +export class CollectionsClient { + constructor(private client: r2rClient) {} + + /** + * Create a new collection. + * @param name Name of the collection + * @param description Optional description of the collection + * @returns A promise that resolves with the created collection + */ + @feature("collections.create") + async create(options: { + name: string; + description?: string; + }): Promise { + return this.client.makeRequest("POST", "collections", { + data: options, + }); + } + + /** + * List collections with pagination and filtering options. + * @param ids Optional list of collection IDs to filter by + * @param offset Optional offset for pagination + * @param limit Optional limit for pagination + * @returns + */ + @feature("collections.list") + async list(options?: { + ids?: string[]; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + if (options?.ids && options.ids.length > 0) { + params.ids = options.ids; + } + + return this.client.makeRequest("GET", "collections", { + params, + }); + } + + /** + * Get detailed information about a specific collection. + * @param id Collection ID to retrieve + * @returns + */ + @feature("collections.retrieve") + async retrieve(options: { id: string }): Promise { + return this.client.makeRequest("GET", `collections/${options.id}`); + } + + /** + * Update an existing collection. + * @param id Collection ID to update + * @param name Optional new name for the collection + * @param description Optional new description for the collection + * @returns + */ + @feature("collections.update") + async update(options: { + id: string; + name?: string; + description?: string; + }): Promise { + const data = { + ...(options.name && { name: options.name }), + ...(options.description && { description: options.description }), + }; + + return this.client.makeRequest("POST", `collections/${options.id}`, { + data, + }); + } + + /** + * Delete a collection. + * @param id Collection ID to delete + * @returns + */ + @feature("collections.delete") + async delete(options: { id: string }): Promise { + return this.client.makeRequest("DELETE", `collections/${options.id}`); + } + + /** + * List all documents in a collection. + * @param id Collection ID + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("collections.listDocuments") + async listDocuments(options: { + id: string; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + return this.client.makeRequest( + "GET", + `collections/${options.id}/documents`, + { + params, + }, + ); + } + + /** + * Add a document to a collection. + * @param id Collection ID + * @param documentId Document ID to add + * @returns + */ + @feature("collections.addDocument") + async addDocument(options: { + id: string; + documentId: string; + }): Promise { + return this.client.makeRequest( + "POST", + `collections/${options.id}/documents/${options.documentId}`, + ); + } + + /** + * Remove a document from a collection. + * @param id Collection ID + * @param documentId Document ID to remove + * @returns + */ + @feature("collections.removeDocument") + async removeDocument(options: { + id: string; + documentId: string; + }): Promise { + return this.client.makeRequest( + "DELETE", + `collections/${options.id}/documents/${options.documentId}`, + ); + } + + /** + * List all users in a collection. + * @param id Collection ID + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("collections.listUsers") + async listUsers(options: { + id: string; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + return this.client.makeRequest("GET", `collections/${options.id}/users`, { + params, + }); + } + + /** + * Add a user to a collection. + * @param id Collection ID + * @param userId User ID to add + * @returns + */ + @feature("collections.addUser") + async addUser(options: { + id: string; + userId: string; + }): Promise { + return this.client.makeRequest( + "POST", + `collections/${options.id}/users/${options.userId}`, + ); + } + + /** + * Remove a user from a collection. + * @param id Collection ID + * @param userId User ID to remove + * @returns + */ + @feature("collections.removeUser") + async removeUser(options: { + id: string; + userId: string; + }): Promise { + return this.client.makeRequest( + "DELETE", + `collections/${options.id}/users/${options.userId}`, + ); + } + + /** + * Creates communities in the graph by analyzing entity relationships and similarities. + * + * Communities are created through the following process: + * 1. Analyzes entity relationships and metadata to build a similarity graph + * 2. Applies advanced community detection algorithms (e.g. Leiden) to identify densely connected groups + * 3. Creates hierarchical community structure with multiple granularity levels + * 4. Generates natural language summaries and statistical insights for each community + * + * The resulting communities can be used to: + * - Understand high-level graph structure and organization + * - Identify key entity groupings and their relationships + * - Navigate and explore the graph at different levels of detail + * - Generate insights about entity clusters and their characteristics + * + * The community detection process is configurable through settings like: + * - Community detection algorithm parameters + * - Summary generation prompt + * @param collectionId The collection ID corresponding to the graph + * @returns + */ + @feature("collections.extract") + async extract(options: { + collectionId: string; + runType?: string; + settings?: Record; + runWithOrchestration?: boolean; + }): Promise { + const data = { + ...(options.settings && { settings: options.settings }), + ...(options.runType && { run_type: options.runType }), + ...(options.runWithOrchestration && { + run_with_orchestration: options.runWithOrchestration, + }), + }; + + return this.client.makeRequest( + "POST", + `collections/${options.collectionId}/extract`, + { + data, + }, + ); + } +} diff --git a/js/sdk/src/v3/clients/conversations.ts b/js/sdk/src/v3/clients/conversations.ts new file mode 100644 index 000000000..796c1edc7 --- /dev/null +++ b/js/sdk/src/v3/clients/conversations.ts @@ -0,0 +1,164 @@ +import { feature } from "../../feature"; +import { r2rClient } from "../../r2rClient"; +import { + WrappedBooleanResponse, + WrappedBranchesResponse, + WrappedConversationMessagesResponse, + WrappedConversationResponse, + WrappedConversationsResponse, + WrappedMessageResponse, +} from "../../types"; + +export class ConversationsClient { + constructor(private client: r2rClient) {} + + /** + * Create a new conversation. + * @returns + */ + @feature("conversations.create") + async create(): Promise { + return this.client.makeRequest("POST", "conversations"); + } + + /** + * List conversations with pagination and sorting options. + * @param ids List of conversation IDs to retrieve + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("conversations.list") + async list(options?: { + ids?: string[]; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + if (options?.ids && options.ids.length > 0) { + params.ids = options.ids; + } + + return this.client.makeRequest("GET", "conversations", { + params, + }); + } + + /** + * Get detailed information about a specific conversation. + * @param id The ID of the conversation to retrieve + * @param branchID The ID of the branch to retrieve + * @returns + */ + @feature("conversations.retrieve") + async retrieve(options: { + id: string; + branchID?: string; + }): Promise { + const params: Record = { + branchID: options.branchID, + }; + + return this.client.makeRequest("GET", `conversations/${options.id}`, { + params, + }); + } + + /** + * Delete a conversation. + * @param id The ID of the conversation to delete + * @returns + */ + @feature("conversations.delete") + async delete(options: { id: string }): Promise { + return this.client.makeRequest("DELETE", `conversations/${options.id}`); + } + + /** + * Add a new message to a conversation. + * @param id The ID of the conversation to add the message to + * @param content The content of the message + * @param role The role of the message (e.g., "user" or "assistant") + * @param parentID The ID of the parent message + * @param metadata Additional metadata to attach to the message + * @returns + */ + @feature("conversations.addMessage") + async addMessage(options: { + id: string; + content: string; + role: string; + parentID?: string; + metadata?: Record; + }): Promise { + const data: Record = { + content: options.content, + role: options.role, + ...(options.parentID && { parentID: options.parentID }), + ...(options.metadata && { metadata: options.metadata }), + }; + + return this.client.makeRequest( + "POST", + `conversations/${options.id}/messages`, + { + data, + }, + ); + } + + /** + * Update an existing message in a conversation. + * @param id The ID of the conversation containing the message + * @param messageID The ID of the message to update + * @param content The new content of the message + * @returns + */ + @feature("conversations.updateMessage") + async updateMessage(options: { + id: string; + messageID: string; + content: string; + }): Promise { + const data: Record = { + content: options.content, + }; + + return this.client.makeRequest( + "POST", + `conversations/${options.id}/messages/${options.messageID}`, + { + data, + }, + ); + } + + /** + * List all branches in a conversation. + * @param id The ID of the conversation to list branches for + * @returns + */ + @feature("conversations.listBranches") + async listBranches(options: { + id: string; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + return this.client.makeRequest( + "GET", + `conversations/${options.id}/branches`, + { + params, + }, + ); + } +} diff --git a/js/sdk/src/v3/clients/documents.ts b/js/sdk/src/v3/clients/documents.ts new file mode 100644 index 000000000..ffc1171e4 --- /dev/null +++ b/js/sdk/src/v3/clients/documents.ts @@ -0,0 +1,375 @@ +import { r2rClient } from "../../r2rClient"; +import FormData from "form-data"; +import { + WrappedBooleanResponse, + WrappedChunksResponse, + WrappedCollectionsResponse, + WrappedDocumentResponse, + WrappedDocumentsResponse, + WrappedEntitiesResponse, + WrappedIngestionResponse, + WrappedRelationshipsResponse, +} from "../../types"; +import { feature } from "../../feature"; + +let fs: any; +if (typeof window === "undefined") { + import("fs").then((module) => { + fs = module; + }); +} + +type FileInput = string | File | { path: string; name: string }; + +export class DocumentsClient { + constructor(private client: r2rClient) {} + + /** + * Create a new document from either a file or content. + * @param file The file to upload, if any + * @param raw_text Optional raw text content to upload, if no file path is provided + * @param chunks Optional array of pre-processed text chunks to ingest + * @param id Optional ID to assign to the document + * @param collectionIds Collection IDs to associate with the document. If none are provided, the document will be assigned to the user's default collection. + * @param metadata Optional metadata to assign to the document + * @param ingestionConfig Optional ingestion configuration to use + * @param runWithOrchestration Optional flag to run with orchestration + * @returns + */ + @feature("documents.create") + async create(options: { + file?: FileInput; + raw_text?: string; + chunks?: string[]; + id?: string; + metadata?: Record; + ingestionConfig?: Record; + collectionIds?: string[]; + runWithOrchestration?: boolean; + }): Promise { + const inputCount = [options.file, options.raw_text, options.chunks].filter( + (x) => x !== undefined, + ).length; + if (inputCount === 0) { + throw new Error("Either file, raw_text, or chunks must be provided"); + } + if (inputCount > 1) { + throw new Error("Only one of file, raw_text, or chunks may be provided"); + } + + const formData = new FormData(); + const processedFiles: string[] = []; + + const processPath = async (path: FileInput): Promise => { + const appendFile = ( + file: File | NodeJS.ReadableStream, + filename: string, + ) => { + formData.append(`file`, file, filename); + processedFiles.push(filename); + }; + + if (typeof path === "string") { + if (typeof window === "undefined") { + const stat = await fs.promises.stat(path); + if (stat.isDirectory()) { + throw new Error("Directories are not supported in create()"); + } else { + appendFile(fs.createReadStream(path), path.split("/").pop() || ""); + } + } else { + console.warn( + "File or folder path provided in browser environment. This is not supported.", + ); + } + } else if (path instanceof File) { + appendFile(path, path.name); + } else if ("path" in path && "name" in path) { + if (typeof window === "undefined") { + appendFile(fs.createReadStream(path.path), path.name); + } else { + console.warn( + "File path provided in browser environment. This is not supported.", + ); + } + } + }; + + if (options.file) { + await processPath(options.file); + } + + if (options.raw_text) { + formData.append("raw_text", options.raw_text); + } + if (options.chunks) { + formData.append("chunks", JSON.stringify(options.chunks)); + } + if (options.id) { + formData.append("id", options.id); + } + if (options.metadata) { + formData.append("metadata", JSON.stringify(options.metadata)); + } + if (options.ingestionConfig) { + formData.append( + "ingestion_config", + JSON.stringify(options.ingestionConfig), + ); + } + if (options.collectionIds) { + options.collectionIds.forEach((id) => { + formData.append("collection_ids", id); + }); + } + if (options.runWithOrchestration !== undefined) { + formData.append( + "run_with_orchestration", + String(options.runWithOrchestration), + ); + } + + formData.append("file_names", JSON.stringify(processedFiles)); + + return this.client.makeRequest("POST", "documents", { + data: formData, + headers: formData.getHeaders?.() ?? { + "Content-Type": "multipart/form-data", + }, + transformRequest: [ + (data: any, headers: Record) => { + delete headers["Content-Type"]; + return data; + }, + ], + }); + } + + /** + * Get a specific document by ID. + * @param ids Optional list of document IDs to filter by + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("documents.retrieve") + async retrieve(options: { id: string }): Promise { + return this.client.makeRequest("GET", `documents/${options.id}`); + } + + /** + * List documents with pagination. + * @param ids Optional list of document IDs to filter by + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("documents.list") + async list(options?: { + ids?: string[]; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + if (options?.ids) { + params.ids = options.ids; + } + + return this.client.makeRequest("GET", "documents", { + params, + }); + } + + /** + * Download a document's file content. + * @param id ID of document to download + * @returns + */ + @feature("documents.download") + async download(options: { id: string }): Promise { + return this.client.makeRequest("GET", `documents/${options.id}/download`, { + responseType: "blob", + }); + } + + /** + * Delete a specific document. + * @param id ID of document to delete + * @returns + */ + @feature("documents.delete") + async delete(options: { id: string }): Promise { + return this.client.makeRequest("DELETE", `documents/${options.id}`); + } + + /** + * Get chunks for a specific document. + * @param id Document ID to retrieve chunks for + * @param includeVectors Whether to include vectors in the response + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("documents.listChunks") + async listChunks(options: { + id: string; + includeVectors?: boolean; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + includeVectors: options.includeVectors ?? false, + offset: options.offset ?? 0, + limit: options.limit ?? 100, + }; + + return this.client.makeRequest("GET", `documents/${options.id}/chunks`, { + params, + }); + } + + /** + * List collections for a specific document. + * @param id ID of document to retrieve collections for + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("documents.listCollections") + async listCollections(options: { + id: string; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options.offset ?? 0, + limit: options.limit ?? 100, + }; + + return this.client.makeRequest( + "GET", + `documents/${options.id}/collections`, + { + params, + }, + ); + } + + @feature("documents.deleteByFilter") + async deleteByFilter(options: { + filters: Record; + }): Promise { + return this.client.makeRequest("DELETE", "documents/by-filter", { + data: options.filters, + }); + } + + /** + * Extracts entities and relationships from a document. + * + * The entities and relationships extraction process involves: + * 1. Parsing documents into semantic chunks + * 2. Extracting entities and relationships using LLMs + * @param options + * @returns + */ + @feature("documents.extract") + async extract(options: { + id: string; + runType?: string; + runWithOrchestration?: boolean; + }): Promise { + const data: Record = {}; + + if (options.runType) { + data.runType = options.runType; + } + if (options.runWithOrchestration !== undefined) { + data.runWithOrchestration = options.runWithOrchestration; + } + + return this.client.makeRequest("POST", `documents/${options.id}/extract`, { + data, + }); + } + + /** + * Retrieves the entities that were extracted from a document. These + * represent important semantic elements like people, places, + * organizations, concepts, etc. + * + * Users can only access entities from documents they own or have access + * to through collections. Entity embeddings are only included if + * specifically requested. + * + * Results are returned in the order they were extracted from the document. + * @param id Document ID to retrieve entities for + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @param includeEmbeddings Whether to include vector embeddings in the response. + * @returns + */ + @feature("documents.listEntities") + async listEntities(options: { + id: string; + offset?: number; + limit?: number; + includeVectors?: boolean; + }): Promise { + const params: Record = { + offset: options.offset ?? 0, + limit: options.limit ?? 100, + includeVectors: options.includeVectors ?? false, + }; + + return this.client.makeRequest("GET", `documents/${options.id}/entities`, { + params, + }); + } + + /** + * Retrieves the relationships between entities that were extracted from + * a document. These represent connections and interactions between + * entities found in the text. + * + * Users can only access relationships from documents they own or have + * access to through collections. Results can be filtered by entity names + * and relationship types. + * + * Results are returned in the order they were extracted from the document. + * @param id Document ID to retrieve relationships for + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @param includeEmbeddings Whether to include vector embeddings in the response. + * @param entityNames Filter relationships by specific entity names. + * @param relationshipTypes Filter relationships by specific relationship types. + * @returns WrappedRelationshipsResponse + */ + @feature("documents.listRelationships") + async listRelationships(options: { + id: string; + offset?: number; + limit?: number; + includeVectors?: boolean; + entityNames?: string[]; + relationshipTypes?: string[]; + }): Promise { + const params: Record = { + offset: options.offset ?? 0, + limit: options.limit ?? 100, + includeVectors: options.includeVectors ?? false, + }; + + return this.client.makeRequest( + "GET", + `documents/${options.id}/relationships`, + { + params, + }, + ); + } +} diff --git a/js/sdk/src/v3/clients/graphs.ts b/js/sdk/src/v3/clients/graphs.ts new file mode 100644 index 000000000..9b282f448 --- /dev/null +++ b/js/sdk/src/v3/clients/graphs.ts @@ -0,0 +1,602 @@ +import { feature } from "../../feature"; +import { r2rClient } from "../../r2rClient"; +import { + WrappedGraphResponse, + WrappedBooleanResponse, + WrappedGraphsResponse, + WrappedEntityResponse, + WrappedEntitiesResponse, + WrappedRelationshipsResponse, + WrappedRelationshipResponse, + WrappedCommunitiesResponse, + WrappedCommunityResponse, +} from "../../types"; + +export class GraphsClient { + constructor(private client: r2rClient) {} + + /** + * List graphs with pagination and filtering options. + * @param collectionIds Optional list of collection IDs to filter by + * @param offset Optional offset for pagination + * @param limit Optional limit for pagination + * @returns + */ + @feature("graphs.list") + async list(options?: { + collectionIds?: string[]; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + if (options?.collectionIds && options.collectionIds.length > 0) { + params.collectionIds = options.collectionIds; + } + + return this.client.makeRequest("GET", "graphs", { + params, + }); + } + + /** + * Get detailed information about a specific graph. + * @param collectionId The collection ID corresponding to the graph + * @returns + */ + @feature("graphs.retrieve") + async retrieve(options: { + collectionId: string; + }): Promise { + return this.client.makeRequest("GET", `graphs/${options.collectionId}`); + } + + /** + * Deletes a graph and all its associated data. + * + * This endpoint permanently removes the specified graph along with all + * entities and relationships that belong to only this graph. + * + * Entities and relationships extracted from documents are not deleted. + * @param collectionId The collection ID corresponding to the graph + * @returns + */ + @feature("graphs.reset") + async reset(options: { + collectionId: string; + }): Promise { + return this.client.makeRequest( + "POST", + `graphs/${options.collectionId}/reset`, + ); + } + + /** + * Update an existing graph. + * @param collectionId The collection ID corresponding to the graph + * @param name Optional new name for the graph + * @param description Optional new description for the graph + * @returns + */ + @feature("graphs.update") + async update(options: { + collectionId: string; + name?: string; + description?: string; + }): Promise { + const data = { + ...(options.name && { name: options.name }), + ...(options.description && { description: options.description }), + }; + + return this.client.makeRequest("POST", `graphs/${options.collectionId}`, { + data, + }); + } + + /** + * Creates a new entity in the graph. + * @param collectionId The collection ID corresponding to the graph + * @param entity Entity to add + * @returns + */ + @feature("graphs.createEntity") + async createEntity(options: { + collectionId: string; + name: string; + description?: string; + category?: string; + metadata?: Record; + }): Promise { + const data = { + name: options.name, + ...(options.description && { description: options.description }), + ...(options.category && { category: options.category }), + ...(options.metadata && { metadata: options.metadata }), + }; + + return this.client.makeRequest( + "POST", + `graphs/${options.collectionId}/entities`, + { + data, + }, + ); + } + + /** + * List all entities in a graph. + * @param collectionId The collection ID corresponding to the graph + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("graphs.listEntities") + async listEntities(options: { + collectionId: string; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + return this.client.makeRequest( + "GET", + `graphs/${options.collectionId}/entities`, + { + params, + }, + ); + } + + /** + * Retrieve an entity from a graph. + * @param collectionId The collection ID corresponding to the graph + * @param entityId Entity ID to retrieve + * @returns + */ + @feature("graphs.getEntity") + async getEntity(options: { + collectionId: string; + entityId: string; + }): Promise { + return this.client.makeRequest( + "GET", + `graphs/${options.collectionId}/entities/${options.entityId}`, + ); + } + + /** + * Updates an existing entity in the graph. + * @param collectionId The collection ID corresponding to the graph + * @param entityId Entity ID to update + * @param entity Entity to update + * @returns + */ + @feature("graphs.updateEntity") + async updateEntity(options: { + collectionId: string; + entityId: string; + name?: string; + description?: string; + category?: string; + metadata?: Record; + }): Promise { + const data = { + ...(options.name && { name: options.name }), + ...(options.description && { description: options.description }), + ...(options.category && { category: options.category }), + ...(options.metadata && { metadata: options.metadata }), + }; + + return this.client.makeRequest( + "POST", + `graphs/${options.collectionId}/entities/${options.entityId}`, + { + data, + }, + ); + } + + /** + * Remove an entity from a graph. + * @param collectionId The collection ID corresponding to the graph + * @param entityId Entity ID to remove + * @returns + */ + @feature("graphs.removeEntity") + async removeEntity(options: { + collectionId: string; + entityId: string; + }): Promise { + return this.client.makeRequest( + "DELETE", + `graphs/${options.collectionId}/entities/${options.entityId}`, + ); + } + /** + * Creates a new relationship in the graph. + * @param collectionId The collection ID corresponding to the graph + * @param relationship Relationship to add + * @returns + */ + @feature("graphs.createRelationship") + async createRelationship(options: { + collectionId: string; + subject: string; + subjectId: string; + predicate: string; + object: string; + objectId: string; + description?: string; + weight?: number; + metadata?: Record; + }): Promise { + const data = { + subject: options.subject, + subject_id: options.subjectId, + predicate: options.predicate, + object: options.object, + object_id: options.objectId, + ...(options.description && { description: options.description }), + ...(options.weight && { weight: options.weight }), + ...(options.metadata && { metadata: options.metadata }), + }; + + return this.client.makeRequest( + "POST", + `graphs/${options.collectionId}/relationships`, + { + data, + }, + ); + } + + /** + * List all relationships in a graph. + * @param collectionId The collection ID corresponding to the graph + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("graphs.listRelationships") + async listRelationships(options: { + collectionId: string; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + return this.client.makeRequest( + "GET", + `graphs/${options.collectionId}/relationships`, + { + params, + }, + ); + } + + /** + * Retrieve a relationship from a graph. + * @param collectionId The collection ID corresponding to the graph + * @param relationshipId Relationship ID to retrieve + * @returns + */ + @feature("graphs.getRelationship") + async getRelationship(options: { + collectionId: string; + relationshipId: string; + }): Promise { + return this.client.makeRequest( + "GET", + `graphs/${options.collectionId}/relationships/${options.relationshipId}`, + ); + } + + /** + * Updates an existing relationship in the graph. + * @param collectionId The collection ID corresponding to the graph + * @param relationshipId Relationship ID to update + * @param relationship Relationship to update + * @returns WrappedRelationshipResponse + */ + @feature("graphs.updateRelationship") + async updateRelationship(options: { + collectionId: string; + relationshipId: string; + subject?: string; + subjectId?: string; + predicate?: string; + object?: string; + objectId?: string; + description?: string; + weight?: number; + metadata?: Record; + }): Promise { + const data = { + ...(options.subject && { subject: options.subject }), + ...(options.subjectId && { subject_id: options.subjectId }), + ...(options.predicate && { predicate: options.predicate }), + ...(options.object && { object: options.object }), + ...(options.objectId && { object_id: options.objectId }), + ...(options.description && { description: options.description }), + ...(options.weight && { weight: options.weight }), + ...(options.metadata && { metadata: options.metadata }), + }; + + return this.client.makeRequest( + "POST", + `graphs/${options.collectionId}/relationships/${options.relationshipId}`, + { + data, + }, + ); + } + + /** + * Remove a relationship from a graph. + * @param collectionId The collection ID corresponding to the graph + * @param relationshipId Entity ID to remove + * @returns + */ + @feature("graphs.removeRelationship") + async removeRelationship(options: { + collectionId: string; + relationshipId: string; + }): Promise { + return this.client.makeRequest( + "DELETE", + `graphs/${options.collectionId}/relationships/${options.relationshipId}`, + ); + } + + /** + * Creates a new community in the graph. + * + * While communities are typically built automatically via the /graphs/{id}/communities/build endpoint, + * this endpoint allows you to manually create your own communities. + * + * This can be useful when you want to: + * - Define custom groupings of entities based on domain knowledge + * - Add communities that weren't detected by the automatic process + * - Create hierarchical organization structures + * - Tag groups of entities with specific metadata + * + * The created communities will be integrated with any existing automatically detected communities + * in the graph's community structure. + * + * @param collectionId The collection ID corresponding to the graph + * @param name Name of the community + * @param summary Summary of the community + * @param findings Findings or insights about the community + * @param rating Rating of the community + * @param ratingExplanation Explanation of the community rating + * @param attributes Additional attributes to associate with the community + * @returns WrappedCommunityResponse + */ + @feature("graphs.createCommunity") + async createCommunity(options: { + collectionId: string; + name: string; + summary: string; + findings?: string[]; + rating?: number; + ratingExplanation?: string; + attributes?: Record; + }): Promise { + const data = { + name: options.name, + ...(options.summary && { summary: options.summary }), + ...(options.findings && { findings: options.findings }), + ...(options.rating && { rating: options.rating }), + ...(options.ratingExplanation && { + rating_explanation: options.ratingExplanation, + }), + ...(options.attributes && { attributes: options.attributes }), + }; + + return this.client.makeRequest( + "POST", + `graphs/${options.collectionId}/communities`, + { + data, + }, + ); + } + + /** + * List all communities in a graph. + * @param collectionId The collection ID corresponding to the graph + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("graphs.listCommunities") + async listCommunities(options: { + collectionId: string; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + return this.client.makeRequest( + "GET", + `graphs/${options.collectionId}/communities`, + { + params, + }, + ); + } + + /** + * Retrieve a community from a graph. + * @param collectionId The collection ID corresponding to the graph + * @param communityId Entity ID to retrieve + * @returns + */ + @feature("graphs.getCommunity") + async getCommunity(options: { + collectionId: string; + communityId: string; + }): Promise { + return this.client.makeRequest( + "GET", + `graphs/${options.collectionId}/communities/${options.communityId}`, + ); + } + + /** + * Updates an existing community in the graph. + * @param collectionId The collection ID corresponding to the graph + * @param communityId Community ID to update + * @param entity Entity to update + * @returns WrappedCommunityResponse + */ + @feature("graphs.updateCommunity") + async updateCommunity(options: { + collectionId: string; + communityId: string; + name?: string; + summary?: string; + findings?: string[]; + rating?: number; + ratingExplanation?: string; + attributes?: Record; + }): Promise { + const data = { + ...(options.name && { name: options.name }), + ...(options.summary && { summary: options.summary }), + ...(options.findings && { findings: options.findings }), + ...(options.rating && { rating: options.rating }), + ...(options.ratingExplanation && { + rating_explanation: options.ratingExplanation, + }), + ...(options.attributes && { attributes: options.attributes }), + }; + return this.client.makeRequest( + "POST", + `graphs/${options.collectionId}/communities/${options.communityId}`, + { + data, + }, + ); + } + + /** + * Delete a community in a graph. + * @param collectionId The collection ID corresponding to the graph + * @param communityId Community ID to delete + * @returns + */ + @feature("graphs.deleteCommunity") + async deleteCommunity(options: { + collectionId: string; + communityId: string; + }): Promise { + return this.client.makeRequest( + "DELETE", + `graphs/${options.collectionId}/communities/${options.communityId}`, + ); + } + + /** + * Adds documents to a graph by copying their entities and relationships. + * + * This endpoint: + * 1. Copies document entities to the graphs_entities table + * 2. Copies document relationships to the graphs_relationships table + * 3. Associates the documents with the graph + * + * When a document is added: + * - Its entities and relationships are copied to graph-specific tables + * - Existing entities/relationships are updated by merging their properties + * - The document ID is recorded in the graph's document_ids array + * + * Documents added to a graph will contribute their knowledge to: + * - Graph analysis and querying + * - Community detection + * - Knowledge graph enrichment + * + * The user must have access to both the graph and the documents being added. + * @param collectionId The collection ID corresponding to the graph + * @returns + */ + @feature("graphs.pull") + async pull(options: { + collectionId: string; + }): Promise { + return this.client.makeRequest( + "POST", + `graphs/${options.collectionId}/pull`, + ); + } + + /** + * Removes a document from a graph and removes any associated entities + * + * This endpoint: + * 1. Removes the document ID from the graph's document_ids array + * 2. Optionally deletes the document's copied entities and relationships + * + * The user must have access to both the graph and the document being removed. + * @param collectionId The collection ID corresponding to the graph + * @param documentId The document ID to remove + * @returns + */ + @feature("graphs.removeDocument") + async removeDocument(options: { + collectionId: string; + documentId: string; + }): Promise { + return this.client.makeRequest( + "DELETE", + `graphs/${options.collectionId}/documents/${options.documentId}`, + ); + } + + /** + * Creates communities in the graph by analyzing entity relationships and similarities. + * + * Communities are created through the following process: + * 1. Analyzes entity relationships and metadata to build a similarity graph + * 2. Applies advanced community detection algorithms (e.g. Leiden) to identify densely connected groups + * 3. Creates hierarchical community structure with multiple granularity levels + * 4. Generates natural language summaries and statistical insights for each community + * + * The resulting communities can be used to: + * - Understand high-level graph structure and organization + * - Identify key entity groupings and their relationships + * - Navigate and explore the graph at different levels of detail + * - Generate insights about entity clusters and their characteristics + * + * The community detection process is configurable through settings like: + * - Community detection algorithm parameters + * - Summary generation prompt + * + * @param options + * @returns + */ + @feature("graphs.buildCommunities") + async buildCommunities(options: { + collectionId: string; + runType?: string; + kgEntichmentSettings?: Record; + runWithOrchestration?: boolean; + }): Promise { + return this.client.makeRequest( + "POST", + `graphs/${options.collectionId}/communities/build`, + ); + } +} diff --git a/js/sdk/src/v3/clients/indices.ts b/js/sdk/src/v3/clients/indices.ts new file mode 100644 index 000000000..cae88989b --- /dev/null +++ b/js/sdk/src/v3/clients/indices.ts @@ -0,0 +1,95 @@ +import { feature } from "../../feature"; +import { r2rClient } from "../../r2rClient"; +import { + IndexConfig, + WrappedGenericMessageResponse, + WrappedListVectorIndicesResponse, +} from "../../types"; + +export class IndiciesClient { + constructor(private client: r2rClient) {} + + /** + * Create a new vector similarity search index in the database. + * @param config Configuration for the vector index. + * @param runWithOrchestration Whether to run index creation as an orchestrated task. + * @returns + */ + @feature("indices.create") + async create(options: { + config: IndexConfig; + runWithOrchestration?: boolean; + }): Promise { + const data = { + config: options.config, + ...(options.runWithOrchestration && { + run_with_orchestration: options.runWithOrchestration, + }), + }; + + return this.client.makeRequest("POST", `indices`, { + data, + }); + } + + /** + * List existing vector similarity search indices with pagination support. + * @param filters Filter criteria for indices. + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("indices.list") + async list(options?: { + filters?: Record; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + if (options?.filters) { + params.filters = options.filters; + } + + return this.client.makeRequest("GET", `indices`, { + params, + }); + } + + /** + * Get detailed information about a specific vector index. + * @param indexName The name of the index to retrieve. + * @param tableName The name of the table where the index is stored. + * @returns + */ + @feature("indices.retrieve") + async retrieve(options: { + tableName: string; + indexName: string; + }): Promise { + return this.client.makeRequest( + "GET", + `indices/${options.indexName}/${options.tableName}`, + ); + } + + /** + * Delete an existing vector index. + * @param indexName The name of the index to delete. + * @param tableName The name of the table where the index is stored. + * @returns + */ + @feature("indices.delete") + async delete(options: { + tableName: string; + indexName: string; + }): Promise { + return this.client.makeRequest( + "DELETE", + `indices/${options.indexName}/${options.tableName}`, + ); + } +} diff --git a/js/sdk/src/v3/clients/prompts.ts b/js/sdk/src/v3/clients/prompts.ts new file mode 100644 index 000000000..2dc54e951 --- /dev/null +++ b/js/sdk/src/v3/clients/prompts.ts @@ -0,0 +1,111 @@ +import { feature } from "../../feature"; +import { r2rClient } from "../../r2rClient"; +import { + WrappedBooleanResponse, + WrappedGenericMessageResponse, + WrappedPromptResponse, + WrappedPromptsResponse, +} from "../../types"; + +export class PromptsClient { + constructor(private client: r2rClient) {} + + /** + * Create a new prompt with the given configuration. + * + * This endpoint allows superusers to create a new prompt with a + * specified name, template, and input types. + * @param name The name of the prompt + * @param template The template string for the prompt + * @param inputTypes A dictionary mapping input names to their types + * @returns + */ + @feature("prompts.create") + async create(options: { + name: string; + template: string; + inputTypes: Record; + }): Promise { + return this.client.makeRequest("POST", "prompts", { + data: options, + }); + } + + /** + * List all available prompts. + * + * This endpoint retrieves a list of all prompts in the system. + * Only superusers can access this endpoint. + * @returns + */ + @feature("prompts.list") + async list(): Promise { + return this.client.makeRequest("GET", "prompts"); + } + + /** + * Get a specific prompt by name, optionally with inputs and override. + * + * This endpoint retrieves a specific prompt and allows for optional + * inputs and template override. + * Only superusers can access this endpoint. + * @param options + * @returns + */ + @feature("prompts.retrieve") + async retrieve(options: { + name: string; + inputs?: string[]; + promptOverride?: string; + }): Promise { + const data: Record = { + ...(options.inputs && { inputs: options.inputs }), + ...(options.promptOverride && { + promptOverride: options.promptOverride, + }), + }; + + return this.client.makeRequest("POST", `prompts/${options.name}`, { + params: data, + }); + } + + /** + * Update an existing prompt's template and/or input types. + * + * This endpoint allows superusers to update the template and input types of an existing prompt. + * @param options + * @returns + */ + @feature("prompts.update") + async update(options: { + name: string; + template?: string; + inputTypes?: Record; + }): Promise { + const params: Record = { + name: options.name, + }; + if (options.template) { + params.template = options.template; + } + if (options.inputTypes) { + params.inputTypes = options.inputTypes; + } + + return this.client.makeRequest("PUT", `prompts/${options.name}`, { + data: params, + }); + } + + /** + * Delete a prompt by name. + * + * This endpoint allows superusers to delete an existing prompt. + * @param name The name of the prompt to delete + * @returns + */ + async delete(options: { name: string }): Promise { + return this.client.makeRequest("DELETE", `prompts/${options.name}`); + } +} diff --git a/js/sdk/src/v3/clients/retrieval.ts b/js/sdk/src/v3/clients/retrieval.ts new file mode 100644 index 000000000..08b9f5028 --- /dev/null +++ b/js/sdk/src/v3/clients/retrieval.ts @@ -0,0 +1,278 @@ +import { r2rClient } from "../../r2rClient"; + +import { + Message, + ChunkSearchSettings, + KGSearchSettings, + GenerationConfig, +} from "../../models"; +import { feature } from "../../feature"; + +export class RetrievalClient { + constructor(private client: r2rClient) {} + + /** + * Perform a search query on the vector database and knowledge graph and + * any other configured search engines. + * + * This endpoint allows for complex filtering of search results using + * PostgreSQL-based queries. Filters can be applied to various fields + * such as document_id, and internal metadata values. + * + * Allowed operators include: `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, + * `like`, `ilike`, `in`, and `nin`. + * @param query Search query to find relevant documents + * @param VectorSearchSettings Settings for vector-based search + * @param KGSearchSettings Settings for knowledge graph search + * @returns + */ + @feature("retrieval.search") + async search(options: { + query: string; + vectorSearchSettings?: ChunkSearchSettings | Record; + kgSearchSettings?: KGSearchSettings | Record; + }): Promise { + const data = { + query: options.query, + ...(options.vectorSearchSettings && { + vectorSearchSettings: options.vectorSearchSettings, + }), + ...(options.kgSearchSettings && { + kgSearchSettings: options.kgSearchSettings, + }), + }; + + return await this.client.makeRequest("POST", "retrieval/search", { + data: data, + }); + } + + /** + * Execute a RAG (Retrieval-Augmented Generation) query. + * + * This endpoint combines search results with language model generation. + * It supports the same filtering capabilities as the search endpoint, + * allowing for precise control over the retrieved context. + * + * The generation process can be customized using the `rag_generation_config` parameter. + * @param query + * @param ragGenerationConfig Configuration for RAG generation + * @param vectorSearchSettings Settings for vector-based search + * @param kgSearchSettings Settings for knowledge graph search + * @param taskPromptOverride Optional custom prompt to override default + * @param includeTitleIfAvailable Include document titles in responses when available + * @returns + */ + @feature("retrieval.rag") + async rag(options: { + query: string; + ragGenerationConfig?: GenerationConfig | Record; + vectorSearchSettings?: ChunkSearchSettings | Record; + kgSearchSettings?: KGSearchSettings | Record; + taskPromptOverride?: string; + includeTitleIfAvailable?: boolean; + }): Promise> { + const data = { + query: options.query, + ...(options.vectorSearchSettings && { + vectorSearchSettings: options.vectorSearchSettings, + }), + ...(options.ragGenerationConfig && { + ragGenerationConfig: options.ragGenerationConfig, + }), + ...(options.kgSearchSettings && { + kgSearchSettings: options.kgSearchSettings, + }), + ...(options.taskPromptOverride && { + taskPromptOverride: options.taskPromptOverride, + }), + ...(options.includeTitleIfAvailable && { + includeTitleIfAvailable: options.includeTitleIfAvailable, + }), + }; + + if (options.ragGenerationConfig && options.ragGenerationConfig.stream) { + return this.streamRag(data); + } else { + return await this.client.makeRequest("POST", "retrieval/rag", { + data: data, + }); + } + } + + @feature("retrieval.streamRag") + private async streamRag( + ragData: Record, + ): Promise> { + return this.client.makeRequest>( + "POST", + "retrieval/rag", + { + data: ragData, + headers: { + "Content-Type": "application/json", + }, + responseType: "stream", + }, + ); + } + + /** + * Engage with an intelligent RAG-powered conversational agent for complex + * information retrieval and analysis. + * + * This advanced endpoint combines retrieval-augmented generation (RAG) + * with a conversational AI agent to provide detailed, context-aware + * responses based on your document collection. + * + * The agent can: + * - Maintain conversation context across multiple interactions + * - Dynamically search and retrieve relevant information from both + * vector and knowledge graph sources + * - Break down complex queries into sub-questions for comprehensive + * answers + * - Cite sources and provide evidence-based responses + * - Handle follow-up questions and clarifications + * - Navigate complex topics with multi-step reasoning + * + * Key Features: + * - Hybrid search combining vector and knowledge graph approaches + * - Contextual conversation management with conversation_id tracking + * - Customizable generation parameters for response style and length + * - Source document citation with optional title inclusion + * - Streaming support for real-time responses + * - Branch management for exploring different conversation paths + * + * Common Use Cases: + * - Research assistance and literature review + * - Document analysis and summarization + * - Technical support and troubleshooting + * - Educational Q&A and tutoring + * - Knowledge base exploration + * + * The agent uses both vector search and knowledge graph capabilities to + * find and synthesize information, providing detailed, factual responses + * with proper attribution to source documents. + * @param message Current message to process + * @param ragGenerationConfig Configuration for RAG generation + * @param vectorSearchSettings Settings for vector-based search + * @param kgSearchSettings Settings for knowledge graph search + * @param taskPromptOverride Optional custom prompt to override default + * @param includeTitleIfAvailable Include document titles in responses when available + * @param conversationId ID of the conversation + * @param branchId ID of the conversation branch + * @returns + */ + @feature("retrieval.agent") + async agent(options: { + message: Message; + ragGenerationConfig?: GenerationConfig | Record; + vectorSearchSettings?: ChunkSearchSettings | Record; + kgSearchSettings?: KGSearchSettings | Record; + taskPromptOverride?: string; + includeTitleIfAvailable?: boolean; + conversationId?: string; + branchId?: string; + }): Promise> { + const data: Record = { + message: options.message, + ...(options.vectorSearchSettings && { + vectorSearchSettings: options.vectorSearchSettings, + }), + ...(options.kgSearchSettings && { + kgSearchSettings: options.kgSearchSettings, + }), + ...(options.ragGenerationConfig && { + ragGenerationConfig: options.ragGenerationConfig, + }), + ...(options.taskPromptOverride && { + taskPromptOverride: options.taskPromptOverride, + }), + ...(options.includeTitleIfAvailable && { + includeTitleIfAvailable: options.includeTitleIfAvailable, + }), + ...(options.conversationId && { + conversationId: options.conversationId, + }), + ...(options.branchId && { + branchId: options.branchId, + }), + }; + + if (options.ragGenerationConfig && options.ragGenerationConfig.stream) { + return this.streamAgent(data); + } else { + return await this.client.makeRequest("POST", "retrieval/agent", { + data: data, + }); + } + } + + @feature("retrieval.streamAgent") + private async streamAgent( + agentData: Record, + ): Promise> { + return this.client.makeRequest>( + "POST", + "retrieval/agent", + { + data: agentData, + headers: { + "Content-Type": "application/json", + }, + responseType: "stream", + }, + ); + } + + /** + * Generate completions for a list of messages. + * + * This endpoint uses the language model to generate completions for + * the provided messages. The generation process can be customized using + * the generation_config parameter. + * + * The messages list should contain alternating user and assistant + * messages, with an optional system message at the start. Each message + * should have a 'role' and 'content'. + * @param messages List of messages to generate completion for + * @returns + */ + @feature("retrieval.completion") + async completion(options: { + messages: Message[]; + generationConfig?: GenerationConfig | Record; + }): Promise> { + const data = { + messages: options.messages, + ...(options.generationConfig && { + generationConfig: options.generationConfig, + }), + }; + + if (options.generationConfig && options.generationConfig.stream) { + return this.streamCompletion(data); + } else { + return await this.client.makeRequest("POST", "retrieval/completion", { + data: data, + }); + } + } + + @feature("retrieval.streamCompletion") + private async streamCompletion( + ragData: Record, + ): Promise> { + return this.client.makeRequest>( + "POST", + "retrieval/completion", + { + data: ragData, + headers: { + "Content-Type": "application/json", + }, + responseType: "stream", + }, + ); + } +} diff --git a/js/sdk/src/v3/clients/system.ts b/js/sdk/src/v3/clients/system.ts new file mode 100644 index 000000000..7fdf30773 --- /dev/null +++ b/js/sdk/src/v3/clients/system.ts @@ -0,0 +1,62 @@ +import { feature } from "../../feature"; +import { r2rClient } from "../../r2rClient"; +import { + WrappedGenericMessageResponse, + WrappedLogsResponse, + WrappedServerStatsResponse, + WrappedSettingsResponse, +} from "../../types"; + +export class SystemClient { + constructor(private client: r2rClient) {} + + /** + * Check the health of the R2R server. + */ + @feature("system.health") + async health(): Promise { + return await this.client.makeRequest("GET", "health"); + } + + /** + * Get logs from the server. + * @param options + * @returns + */ + @feature("system.logs") + async logs(options: { + runTypeFilter?: string; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options.offset ?? 0, + limit: options.limit ?? 100, + }; + + if (options.runTypeFilter) { + params.runTypeFilter = options.runTypeFilter; + } + + return this.client.makeRequest("GET", "system/logs", { params }); + } + + /** + * Get the configuration settings for the R2R server. + * @returns + */ + @feature("system.settings") + async settings(): Promise { + return await this.client.makeRequest("GET", "system/settings"); + } + + /** + * Get statistics about the server, including the start time, uptime, + * CPU usage, and memory usage. + * @returns + */ + @feature("system.status") + async status(): Promise { + return await this.client.makeRequest("GET", "system/status"); + } +} diff --git a/js/sdk/src/v3/clients/users.ts b/js/sdk/src/v3/clients/users.ts new file mode 100644 index 000000000..eadd37a7a --- /dev/null +++ b/js/sdk/src/v3/clients/users.ts @@ -0,0 +1,356 @@ +import { feature } from "../../feature"; +import { r2rClient } from "../../r2rClient"; +import { + WrappedBooleanResponse, + WrappedGenericMessageResponse, + WrappedCollectionsResponse, + WrappedTokenResponse, + WrappedUserResponse, + WrappedUsersResponse, +} from "../../types"; + +export class UsersClient { + constructor(private client: r2rClient) {} + + /** + * Register a new user. + * @param email User's email address + * @param password User's password + * @returns + */ + @feature("users.register") + async register(options: { + email: string; + password: string; + }): Promise { + return this.client.makeRequest("POST", "users/register", { + data: options, + }); + } + + /** + * Delete a specific user. + * Users can only delete their own account unless they are superusers. + * @param id User ID to delete + * @param password User's password + * @returns + */ + @feature("users.delete") + async delete(options: { + id: string; + password: string; + }): Promise { + return this.client.makeRequest("DELETE", `users/${options.id}`, { + data: { + password: options.password, + }, + }); + } + + /** + * Verify a user's email address. + * @param email User's email address + * @param verificationCode Verification code sent to the user's email + */ + @feature("users.verifyEmail") + async verifyEmail(options: { + email: string; + verificationCode: string; + }): Promise { + return this.client.makeRequest("POST", "users/verify-email", { + data: options, + }); + } + + /** + * Log in a user. + * @param email User's email address + * @param password User's password + * @returns + */ + @feature("users.login") + async login(options: { email: string; password: string }): Promise { + const response = await this.client.makeRequest("POST", "users/login", { + data: { + username: options.email, + password: options.password, + }, + headers: { + "Content-Type": "application/x-www-form-urlencoded", + }, + }); + + if (response?.results) { + this.client.setTokens( + response.results.access_token.token, + response.results.refresh_token.token, + ); + } + + return response; + } + + /** + * Log in using an existing access token. + * @param accessToken Existing access token + * @returns + */ + @feature("users.loginWithToken") + async loginWithToken(options: { accessToken: string }): Promise { + this.client.setTokens(options.accessToken, null); + + try { + const response = await this.client.makeRequest("GET", "users/me"); + + return { + results: { + access_token: { + token: options.accessToken, + token_type: "access_token", + }, + }, + }; + } catch (error) { + this.client.setTokens(null, null); + throw new Error("Invalid token provided"); + } + } + + /** + * Log out the current user. + * @returns + */ + @feature("users.logout") + async logout(): Promise { + const response = await this.client.makeRequest("POST", "users/logout"); + this.client.setTokens(null, null); + return response; + } + + /** + * Refresh the access token using the refresh token. + * @returns + */ + @feature("users.refreshAccessToken") + async refreshAccessToken(): Promise { + const refreshToken = this.client.getRefreshToken(); + if (!refreshToken) { + throw new Error("No refresh token available. Please login again."); + } + + const response = await this.client.makeRequest( + "POST", + "users/refresh-token", + { + data: refreshToken, + headers: { + "Content-Type": "application/x-www-form-urlencoded", + }, + }, + ); + + if (response?.results) { + this.client.setTokens( + response.results.access_token.token, + response.results.refresh_token.token, + ); + } else { + throw new Error("Invalid response structure"); + } + + return response; + } + + /** + * Change the user's password. + * @param current_password User's current password + * @param new_password User's new password + * @returns + */ + @feature("users.changePassword") + async changePassword(options: { + current_password: string; + new_password: string; + }): Promise { + return this.client.makeRequest("POST", "users/change-password", { + data: options, + }); + } + + /** + * Request a password reset. + * @param email User's email address + * @returns + */ + @feature("users.requestPasswordReset") + async requestPasswordReset(options: { + email: string; + }): Promise { + return this.client.makeRequest("POST", "users/request-password-reset", { + data: options, + }); + } + + /** + * Reset a user's password using a reset token. + * @param reset_token Reset token sent to the user's email + * @param new_password New password for the user + * @returns + */ + @feature("users.resetPassword") + async resetPassword(options: { + reset_token: string; + new_password: string; + }): Promise { + return this.client.makeRequest("POST", "users/reset-password", { + data: options, + }); + } + + /** + * List users with pagination and filtering options. + * @param email Email to filter by (partial match) + * @param is_active Filter by active status + * @param is_superuser Filter by superuser status + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("users.list") + async list(options?: { + email?: string; + is_active?: boolean; + is_superuser?: boolean; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options?.offset ?? 0, + limit: options?.limit ?? 100, + }; + + if (options?.email) { + params.email = options.email; + } + if (options?.is_active) { + params.is_active = options.is_active; + } + if (options?.is_superuser) { + params.is_superuser = options.is_superuser; + } + + return this.client.makeRequest("GET", "users", { + params, + }); + } + + /** + * Get a specific user. + * @param id User ID to retrieve + * @returns + */ + @feature("users.retrieve") + async retrieve(options: { id: string }): Promise { + return this.client.makeRequest("GET", `users/${options.id}`); + } + + /** + * Get detailed information about the currently authenticated user. + * @returns + */ + @feature("users.me") + async me(): Promise { + return this.client.makeRequest("GET", `users/me`); + } + + /** + * Update a user. + * @param id User ID to update + * @param email Optional new email for the user + * @param is_superuser Optional new superuser status for the user + * @param name Optional new name for the user + * @param bio Optional new bio for the user + * @param profilePicture Optional new profile picture for the user + * @returns + */ + @feature("users.update") + async update(options: { + id: string; + email?: string; + is_superuser?: boolean; + name?: string; + bio?: string; + profilePicture?: string; + }): Promise { + const data = { + ...(options.email && { email: options.email }), + ...(options.is_superuser && { is_superuser: options.is_superuser }), + ...(options.name && { name: options.name }), + ...(options.bio && { bio: options.bio }), + ...(options.profilePicture && { + profile_picture: options.profilePicture, + }), + }; + + return this.client.makeRequest("POST", `users/${options.id}`, { + data, + }); + } + + /** + * Get all collections associated with a specific user. + * @param id User ID to retrieve collections for + * @param offset Specifies the number of objects to skip. Defaults to 0. + * @param limit Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + * @returns + */ + @feature("users.listCollections") + async listCollections(options: { + id: string; + offset?: number; + limit?: number; + }): Promise { + const params: Record = { + offset: options.offset ?? 0, + limit: options.limit ?? 100, + }; + + return this.client.makeRequest("GET", `users/${options.id}/collections`, { + params, + }); + } + + /** + * Add a user to a collection. + * @param id User ID to add + * @param collectionId Collection ID to add the user to + * @returns + */ + @feature("users.addToCollection") + async addToCollection(options: { + id: string; + collectionId: string; + }): Promise { + return this.client.makeRequest( + "POST", + `users/${options.id}/collections/${options.collectionId}`, + ); + } + + /** + * Remove a user from a collection. + * @param id User ID to remove + * @param collectionId Collection ID to remove the user from + * @returns + */ + @feature("users.removeFromCollection") + async removeFromCollection(options: { + id: string; + collectionId: string; + }): Promise { + return this.client.makeRequest( + "DELETE", + `users/${options.id}/collections/${options.collectionId}`, + ); + } +} diff --git a/py/.flake8 b/py/.flake8 deleted file mode 100644 index 9cf14dc6d..000000000 --- a/py/.flake8 +++ /dev/null @@ -1,3 +0,0 @@ -[flake8] -exclude = playground/*,web/*,chat/*,docs/*,r2r/examples/,r2r/providers/database/vecs,r2r/providers/database/vecs/*,tests -ignore = E501, E722, W503, E203, F541, W293, W291, E266, F601, F403, F405 diff --git a/py/README.md b/py/README.md index 98672afb5..034e3d36f 100644 --- a/py/README.md +++ b/py/README.md @@ -9,33 +9,31 @@ r2r

-Build, scale, and deploy state of the art Retrieval-Augmented Generation applications. + Containerized, state of the art Retrieval-Augmented Generation (RAG) with a RESTful API

# About -R2R (RAG to Riches), the Elasticsearch for RAG, bridges the gap between experimenting with and deploying state of the art Retrieval-Augmented Generation (RAG) applications. It's a complete platform that helps you quickly build and launch scalable RAG solutions. Built around a containerized [RESTful API](https://r2r-docs.sciphi.ai/api-reference/introduction), R2R offers multimodal ingestion support, hybrid search, GraphRAG capabilities, user management, and observability features. +R2R (RAG to Riches) is the most advanced AI retrieval system, supporting Retrieval-Augmented Generation (RAG) with production-ready features. Built around a containerized [RESTful API](https://r2r-docs.sciphi.ai/api-reference/introduction), R2R offers multimodal content ingestion, hybrid search functionality, configurable GraphRAG, and comprehensive user and document management. For a more complete view of R2R, check out the [full documentation](https://r2r-docs.sciphi.ai/). ## Key Features -- [**📁 Multimodal Ingestion**](https://r2r-docs.sciphi.ai/documentation/configuration/ingestion/overview): Parse `.txt`, `.pdf`, `.json`, `.png`, `.mp3`, and more. +- [**📁 Multimodal Ingestion**](https://r2r-docs.sciphi.ai/documentation/configuration/ingestion): Parse `.txt`, `.pdf`, `.json`, `.png`, `.mp3`, and more. - [**🔍 Hybrid Search**](https://r2r-docs.sciphi.ai/cookbooks/hybrid-search): Combine semantic and keyword search with reciprocal rank fusion for enhanced relevancy. -- [**🔗 Graph RAG**](https://r2r-docs.sciphi.ai/cookbooks/graphrag): Automatically extract relationships and build knowledge graphs. -- [**🗂️ App Management**](https://r2r-docs.sciphi.ai/cookbooks/user-auth): Efficiently manage documents and users with full authentication. +- [**🔗 Knowledge Graphs**](https://r2r-docs.sciphi.ai/cookbooks/knowledge-graphs): Automatically extract entities and relationships and build knowledge graphs. +- [**📊 GraphRAG**](https://r2r-docs.sciphi.ai/cookbooks/graphrag): Cluster and summarize communities with over your created graphs for even richer insights. +- [**🗂️ User Management**](https://r2r-docs.sciphi.ai/cookbooks/user-auth): Efficiently manage documents and user roles within R2R. - [**🔭 Observability**](https://r2r-docs.sciphi.ai/cookbooks/observability): Observe and analyze your RAG engine performance. -- [**🧩 Configurable**](https://r2r-docs.sciphi.ai/documentation/configuration/introduction): Provision your application using intuitive configuration files. -- [**🖥️ Dashboard**](https://github.com/SciPhi-AI/R2R-Dashboard): An open-source React+Next.js app with optional authentication, to interact with R2R via GUI. +- [**🧩 Configuration**](https://r2r-docs.sciphi.ai/documentation/configuration/overview): Setup your application using intuitive configuration files. +- [**🖥️ Dashboard**](https://r2r-docs.sciphi.ai/cookbooks/application): An open-source React+Next.js admin dashboard to interact with R2R via GUI. ## [What's New](https://r2r-docs.sciphi.ai/introduction/whats-new) -- Release 3.1.0    September 6, 2024     - - Warning: These changes are breaking! We will be releasing a migration script soon. - - Orchestration with [Hatchet](https://github.com/hatchet-dev/hatchet) - - Default ingestion provider set to [Unstructured](https://docs.unstructured.io/welcome) - - Improved knowledge graph construction process +- Release 3.3.0    December 3, 2024     + Warning: These changes are breaking! + - [V3 API Specification](https://r2r-docs.sciphi.ai/api-reference/introduction) ## Install with pip The recommended way to get started with R2R is by using our CLI. @@ -63,34 +61,30 @@ r2r serve --docker --full The command above will install the `full` installation which includes Hatchet for orchestration and Unstructured.io for parsing. - ## Getting Started -- [Installation](https://r2r-docs.sciphi.ai/documentation/installation): Quick installation of R2R using Docker or pip -- [Quickstart](https://r2r-docs.sciphi.ai/documentation/quickstart): A quick introduction to R2R's core features - -## API & SDKs - -- [SDK](https://r2r-docs.sciphi.ai/documentation/python-sdk): API reference and Python/JS SDKs for interacting with R2R -- [API](https://r2r-docs.sciphi.ai/api-reference/introduction): API reference and Python/JS SDKs for interacting with R2R -- [Configuration](https://r2r-docs.sciphi.ai/documentation/configuration/introduction): A guide on how to configure your R2R system +- [Installation](/documentation/installation/overview): Quick installation of R2R using Docker or pip +- [Quickstart](/documentation/quickstart): A quick introduction to R2R's core features +- [Setup](/documentation/configuration/overview): Learn how to setup and configure R2R +- [API & SDKs](/api-and-sdks/introduction): API reference and Python/JS SDKs for interacting with R2R ## Cookbooks - Advanced RAG Pipelines - - [RAG Agent](https://r2r-docs.sciphi.ai/cookbooks/agent): R2R's powerful RAG agent - - [Hybrid Search](https://r2r-docs.sciphi.ai/cookbooks/hybrid-search): Introduction to hybrid search - - [Advanced RAG](https://r2r-docs.sciphi.ai/cookbooks/advanced-rag): Advanced RAG features + - [RAG Agent](/cookbooks/rag-agent): R2R's powerful RAG agent + - [Hybrid Search](/cookbooks/hybrid-search): Introduction to hybrid search + - [Advanced RAG](/cookbooks/advanced-rag): Advanced RAG features -- Knowledge Graphs - - [GraphRAG](https://r2r-docs.sciphi.ai/cookbooks/graphrag): Walkthrough of GraphRAG +- Orchestration + - [Orchestration](/cookbooks/orchestration): R2R event orchestration - Auth & Admin Features - - [Web Development](https://r2r-docs.sciphi.ai/cookbooks/web-dev): Building webapps using R2R - - [User Auth](https://r2r-docs.sciphi.ai/cookbooks/user-auth): Authenticating users - - [Collections](https://r2r-docs.sciphi.ai/cookbooks/collections): Document collections - - [Analytics & Observability](https://r2r-docs.sciphi.ai/cookbooks/observability): End-to-end logging and analytics - - [Web Application](https://r2r-docs.sciphi.ai/cookbooks/application): Connecting with the R2R Application + - [Web Development](/cookbooks/web-dev): Building webapps using R2R + - [User Auth](/cookbooks/user-auth): Authenticating users + - [Collections](/cookbooks/collections): Document collections + - [Analytics & Observability](/cookbooks/observability): End-to-end logging and analytics + - [Web Application](/cookbooks/application): Connecting with the R2R Application + ## Community diff --git a/py/cli/__init__.py b/py/cli/__init__.py index bc5458115..e69de29bb 100644 --- a/py/cli/__init__.py +++ b/py/cli/__init__.py @@ -1,18 +0,0 @@ -from .command_group import cli as command_group_cli -from .commands import auth, database, ingestion, management, retrieval, server -from .main import main - -__all__ = [ - # From cli.py - "main", - # From Command Collection - "command_group_cli", - # From Commands - "auth", - "ingestion", - "management", - "kg", - "database", - "retrieval", - "server", -] diff --git a/py/cli/command_group.py b/py/cli/command_group.py index 49c560f88..9a6593249 100644 --- a/py/cli/command_group.py +++ b/py/cli/command_group.py @@ -1,3 +1,5 @@ +from functools import wraps + import asyncclick as click from asyncclick import pass_context from asyncclick.exceptions import Exit @@ -5,6 +7,22 @@ from sdk import R2RAsyncClient +def deprecated_command(new_name): + def decorator(f): + @wraps(f) + async def wrapped(*args, **kwargs): + click.secho( + f"Warning: This command is deprecated. Please use '{new_name}' instead.", + fg="yellow", + err=True, + ) + return await f(*args, **kwargs) + + return wrapped + + return decorator + + @click.group() @click.option( "--base-url", default="http://localhost:7272", help="Base URL for the API" diff --git a/py/cli/commands/auth.py b/py/cli/commands/auth.py deleted file mode 100644 index 41cf4bfea..000000000 --- a/py/cli/commands/auth.py +++ /dev/null @@ -1,13 +0,0 @@ -import secrets - -import asyncclick as click - -from cli.command_group import cli - - -@cli.command() -def generate_private_key(): - """Generate a secure private key for R2R.""" - private_key = secrets.token_urlsafe(32) - click.echo(f"Generated Private Key: {private_key}") - click.echo("Keep this key secure and use it as your R2R_SECRET_KEY.") diff --git a/py/cli/commands/collections.py b/py/cli/commands/collections.py new file mode 100644 index 000000000..52503018d --- /dev/null +++ b/py/cli/commands/collections.py @@ -0,0 +1,141 @@ +import json + +import asyncclick as click +from asyncclick import pass_context + +from cli.utils.timer import timer +from r2r import R2RAsyncClient + + +@click.group() +def collections(): + """Collections commands.""" + pass + + +@collections.command() +@click.argument("name", required=True, type=str) +@click.option("--description", type=str) +@pass_context +async def create(ctx, name, description): + """Create a collection.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.collections.create( + name=name, + description=description, + ) + + click.echo(json.dumps(response, indent=2)) + + +@collections.command() +@click.option("--ids", multiple=True, help="Collection IDs to fetch") +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list(ctx, ids, offset, limit): + """Get an overview of collections.""" + client: R2RAsyncClient = ctx.obj + ids = list(ids) if ids else None + + with timer(): + response = await client.collections.list( + ids=ids, + offset=offset, + limit=limit, + ) + + for user in response["results"]: + click.echo(json.dumps(user, indent=2)) + + +@collections.command() +@click.argument("id", required=True, type=str) +@pass_context +async def retrieve(ctx, id): + """Retrieve a collection by ID.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.collections.retrieve(id=id) + + click.echo(json.dumps(response, indent=2)) + + +@collections.command() +@click.argument("id", required=True, type=str) +@pass_context +async def delete(ctx, id): + """Delete a collection by ID.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.collections.delete(id=id) + + click.echo(json.dumps(response, indent=2)) + + +@collections.command() +@click.argument("id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list_documents(ctx, id, offset, limit): + """Get an overview of collections.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.collections.list_documents( + id=id, + offset=offset, + limit=limit, + ) + + for user in response["results"]: + click.echo(json.dumps(user, indent=2)) + + +@collections.command() +@click.argument("id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list_users(ctx, id, offset, limit): + """Get an overview of collections.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.collections.list_users( + id=id, + offset=offset, + limit=limit, + ) + + for user in response["results"]: + click.echo(json.dumps(user, indent=2)) diff --git a/py/cli/commands/conversations.py b/py/cli/commands/conversations.py new file mode 100644 index 000000000..bad474d58 --- /dev/null +++ b/py/cli/commands/conversations.py @@ -0,0 +1,124 @@ +import json + +import asyncclick as click +from asyncclick import pass_context + +from cli.utils.timer import timer +from r2r import R2RAsyncClient + + +@click.group() +def conversations(): + """Conversations commands.""" + pass + + +@conversations.command() +@pass_context +async def create(ctx): + """Create a conversation.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.conversations.create() + + click.echo(json.dumps(response, indent=2)) + + +@conversations.command() +@click.option("--ids", multiple=True, help="Conversation IDs to fetch") +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list(ctx, ids, offset, limit): + """Get an overview of conversations.""" + client: R2RAsyncClient = ctx.obj + ids = list(ids) if ids else None + + with timer(): + response = await client.conversations.list( + ids=ids, + offset=offset, + limit=limit, + ) + + for user in response["results"]: + click.echo(json.dumps(user, indent=2)) + + +@conversations.command() +@click.argument("id", required=True, type=str) +@pass_context +async def retrieve(ctx, id): + """Retrieve a collection by ID.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.conversations.retrieve(id=id) + + click.echo(json.dumps(response, indent=2)) + + +@conversations.command() +@click.argument("id", required=True, type=str) +@pass_context +async def delete(ctx, id): + """Delete a collection by ID.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.conversations.delete(id=id) + + click.echo(json.dumps(response, indent=2)) + + +@conversations.command() +@click.argument("id", required=True, type=str) +@pass_context +async def list_branches(ctx, id): + """List all branches in a conversation.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.conversations.list_branches( + id=id, + ) + + for user in response["results"]: + click.echo(json.dumps(user, indent=2)) + + +@conversations.command() +@click.argument("id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list_users(ctx, id, offset, limit): + """Get an overview of collections.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.collections.list_users( + id=id, + offset=offset, + limit=limit, + ) + + for user in response["results"]: + click.echo(json.dumps(user, indent=2)) diff --git a/py/cli/commands/database.py b/py/cli/commands/database.py index 311b28809..85adf46fe 100644 --- a/py/cli/commands/database.py +++ b/py/cli/commands/database.py @@ -2,8 +2,6 @@ import asyncclick as click -from cli.command_group import cli - from ..utils.database_utils import ( check_database_connection, get_database_url_from_env, diff --git a/py/cli/commands/documents.py b/py/cli/commands/documents.py new file mode 100644 index 000000000..6c1e6ea7f --- /dev/null +++ b/py/cli/commands/documents.py @@ -0,0 +1,393 @@ +import json +import os +import tempfile +import uuid +from urllib.parse import urlparse + +import asyncclick as click +import requests +from asyncclick import pass_context + +from cli.utils.param_types import JSON +from cli.utils.timer import timer +from r2r import R2RAsyncClient + + +@click.group() +def documents(): + """Documents commands.""" + pass + + +@documents.command() +@click.argument( + "file_paths", nargs=-1, required=True, type=click.Path(exists=True) +) +@click.option("--ids", multiple=True, help="Document IDs for ingestion") +@click.option( + "--metadatas", type=JSON, help="Metadatas for ingestion as a JSON string" +) +@click.option( + "--run-without-orchestration", is_flag=True, help="Run with orchestration" +) +@pass_context +async def create(ctx, file_paths, ids, metadatas, run_without_orchestration): + """Ingest files into R2R.""" + client: R2RAsyncClient = ctx.obj + run_with_orchestration = not run_without_orchestration + responses = [] + + for idx, file_path in enumerate(file_paths): + with timer(): + current_id = [ids[idx]] if ids and idx < len(ids) else None + current_metadata = ( + metadatas[idx] if metadatas and idx < len(metadatas) else None + ) + + click.echo( + f"Processing file {idx + 1}/{len(file_paths)}: {file_path}" + ) + response = await client.documents.create( + file_path=file_path, + metadata=current_metadata, + id=current_id, + run_with_orchestration=run_with_orchestration, + ) + responses.append(response) + click.echo(json.dumps(response, indent=2)) + click.echo("-" * 40) + + click.echo(f"\nProcessed {len(responses)} files successfully.") + + +@documents.command() +@click.argument("file_path", required=True, type=click.Path(exists=True)) +@click.option("--id", required=True, help="Existing document ID to update") +@click.option( + "--metadata", type=JSON, help="Metadatas for ingestion as a JSON string" +) +@click.option( + "--run-without-orchestration", is_flag=True, help="Run with orchestration" +) +@pass_context +async def update(ctx, file_path, id, metadata, run_without_orchestration): + """Update an existing file in R2R.""" + client: R2RAsyncClient = ctx.obj + run_with_orchestration = not run_without_orchestration + responses = [] + + with timer(): + click.echo(f"Updating file {id}: {file_path}") + response = await client.documents.update( + file_path=file_path, + metadata=metadata, + id=id, + run_with_orchestration=run_with_orchestration, + ) + responses.append(response) + click.echo(json.dumps(response, indent=2)) + click.echo("-" * 40) + + click.echo(f"Updated file {id} file successfully.") + + +@documents.command() +@click.argument("id", required=True, type=str) +@pass_context +async def retrieve(ctx, id): + """Retrieve a document by ID.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.documents.retrieve(id=id) + + click.echo(json.dumps(response, indent=2)) + + +@documents.command() +@click.argument("id", required=True, type=str) +@pass_context +async def delete(ctx, id): + """Delete a document by ID.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.documents.delete(id=id) + + click.echo(json.dumps(response, indent=2)) + + +@documents.command() +@click.argument("id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list_chunks(ctx, id, offset, limit): + """List collections for a specific document.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.documents.list_chunks( + id=id, + offset=offset, + limit=limit, + ) + + click.echo(json.dumps(response, indent=2)) + + +@documents.command() +@click.argument("id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list_collections(ctx, id, offset, limit): + """List collections for a specific document.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.documents.list_collections( + id=id, + offset=offset, + limit=limit, + ) + + click.echo(json.dumps(response, indent=2)) + + +# TODO +async def ingest_files_from_urls(client, urls): + """Download and ingest files from given URLs.""" + files_to_ingest = [] + metadatas = [] + document_ids = [] + temp_files = [] + + try: + for url in urls: + filename = os.path.basename(urlparse(url).path) + is_pdf = filename.lower().endswith(".pdf") + + temp_file = tempfile.NamedTemporaryFile( + mode="wb" if is_pdf else "w+", + delete=False, + suffix=f"_{filename}", + ) + temp_files.append(temp_file) + + response = requests.get(url) + response.raise_for_status() + if is_pdf: + temp_file.write(response.content) + else: + temp_file.write(response.text) + temp_file.close() + + files_to_ingest.append(temp_file.name) + metadatas.append({"title": filename}) + # TODO: use the utils function generate_document_id + document_ids.append(str(uuid.uuid5(uuid.NAMESPACE_DNS, url))) + + for it, file in enumerate(files_to_ingest): + click.echo(f"Ingesting file: {file}") + response = await client.documents.create( + file, metadata=metadatas[it], id=document_ids[it] + ) + + return response["results"] + finally: + # Clean up temporary files + for temp_file in temp_files: + os.unlink(temp_file.name) + + +# Missing CLI Commands +@documents.command() +@click.argument("id", required=True, type=str) +@click.option("--run-type", help="Extraction run type (estimate or run)") +@click.option("--settings", type=JSON, help="Extraction settings as JSON") +@click.option( + "--run-without-orchestration", + is_flag=True, + help="Run without orchestration", +) +@pass_context +async def extract(ctx, id, run_type, settings, run_without_orchestration): + """Extract entities and relationships from a document.""" + client: R2RAsyncClient = ctx.obj + run_with_orchestration = not run_without_orchestration + + with timer(): + response = await client.documents.extract( + id=id, + run_type=run_type, + settings=settings, + run_with_orchestration=run_with_orchestration, + ) + + click.echo(json.dumps(response, indent=2)) + + +@documents.command() +@click.argument("id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of items to return. Defaults to 100.", +) +@click.option( + "--include-embeddings", + is_flag=True, + help="Include embeddings in response", +) +@pass_context +async def list_entities(ctx, id, offset, limit, include_embeddings): + """List entities extracted from a document.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.documents.list_entities( + id=id, + offset=offset, + limit=limit, + include_embeddings=include_embeddings, + ) + + click.echo(json.dumps(response, indent=2)) + + +@documents.command() +@click.argument("id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of items to return. Defaults to 100.", +) +@click.option( + "--entity-names", + multiple=True, + help="Filter by entity names", +) +@click.option( + "--relationship-types", + multiple=True, + help="Filter by relationship types", +) +@pass_context +async def list_relationships( + ctx, id, offset, limit, entity_names, relationship_types +): + """List relationships extracted from a document.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.documents.list_relationships( + id=id, + offset=offset, + limit=limit, + entity_names=list(entity_names) if entity_names else None, + relationship_types=( + list(relationship_types) if relationship_types else None + ), + ) + + click.echo(json.dumps(response, indent=2)) + + +@documents.command() +@click.option( + "--v2", is_flag=True, help="use aristotle_v2.txt (a smaller file)" +) +@click.option( + "--v3", is_flag=True, help="use aristotle_v3.txt (a larger file)" +) +@pass_context +async def create_sample(ctx, v2=True, v3=False): + """Ingest the first sample file into R2R.""" + sample_file_url = f"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/aristotle.txt" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await ingest_files_from_urls(client, [sample_file_url]) + click.echo( + f"Sample file ingestion completed. Ingest files response:\n\n{response}" + ) + + +@documents.command() +@pass_context +async def create_samples(ctx): + """Ingest multiple sample files into R2R.""" + client: R2RAsyncClient = ctx.obj + urls = [ + "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_3.html", + "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_4.html", + "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_5.html", + "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/lyft_2021.pdf", + "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/uber_2021.pdf", + "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/got.txt", + "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_1.html", + "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_2.html", + "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/aristotle.txt", + ] + with timer(): + response = await ingest_files_from_urls(client, urls) + + click.echo( + f"Sample files ingestion completed. Ingest files response:\n\n{response}" + ) + + +@documents.command() +@click.option("--ids", multiple=True, help="Document IDs to fetch") +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list(ctx, ids, offset, limit): + """Get an overview of documents.""" + client: R2RAsyncClient = ctx.obj + ids = list(ids) if ids else None + + with timer(): + response = await client.documents.list( + ids=ids, + offset=offset, + limit=limit, + ) + + for document in response["results"]: + click.echo(document) diff --git a/py/cli/commands/graphs.py b/py/cli/commands/graphs.py new file mode 100644 index 000000000..a7a6b6325 --- /dev/null +++ b/py/cli/commands/graphs.py @@ -0,0 +1,382 @@ +import json + +import asyncclick as click +from asyncclick import pass_context + +from cli.utils.param_types import JSON +from cli.utils.timer import timer +from r2r import R2RAsyncClient + + +@click.group() +def graphs(): + """Graphs commands.""" + pass + + +@graphs.command() +@click.option( + "--collection-ids", multiple=True, help="Collection IDs to fetch" +) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of graphs to return. Defaults to 100.", +) +@pass_context +async def list(ctx, collection_ids, offset, limit): + """List available graphs.""" + client: R2RAsyncClient = ctx.obj + collection_ids = list(collection_ids) if collection_ids else None + + with timer(): + response = await client.graphs.list( + collection_ids=collection_ids, + offset=offset, + limit=limit, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@pass_context +async def retrieve(ctx, collection_id): + """Retrieve a specific graph by collection ID.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.retrieve(collection_id=collection_id) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@pass_context +async def reset(ctx, collection_id): + """Reset a graph, removing all its data.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.reset(collection_id=collection_id) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.option("--name", help="New name for the graph") +@click.option("--description", help="New description for the graph") +@pass_context +async def update(ctx, collection_id, name, description): + """Update graph information.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.update( + collection_id=collection_id, + name=name, + description=description, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of entities to return. Defaults to 100.", +) +@pass_context +async def list_entities(ctx, collection_id, offset, limit): + """List entities in a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.list_entities( + collection_id=collection_id, + offset=offset, + limit=limit, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.argument("entity_id", required=True, type=str) +@pass_context +async def get_entity(ctx, collection_id, entity_id): + """Get entity information from a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.get_entity( + collection_id=collection_id, + entity_id=entity_id, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.argument("entity_id", required=True, type=str) +@pass_context +async def remove_entity(ctx, collection_id, entity_id): + """Remove an entity from a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.remove_entity( + collection_id=collection_id, + entity_id=entity_id, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of relationships to return. Defaults to 100.", +) +@pass_context +async def list_relationships(ctx, collection_id, offset, limit): + """List relationships in a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.list_relationships( + collection_id=collection_id, + offset=offset, + limit=limit, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.argument("relationship_id", required=True, type=str) +@pass_context +async def get_relationship(ctx, collection_id, relationship_id): + """Get relationship information from a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.get_relationship( + collection_id=collection_id, + relationship_id=relationship_id, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.argument("relationship_id", required=True, type=str) +@pass_context +async def remove_relationship(ctx, collection_id, relationship_id): + """Remove a relationship from a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.remove_relationship( + collection_id=collection_id, + relationship_id=relationship_id, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.option( + "--settings", required=True, type=JSON, help="Build settings as JSON" +) +@click.option("--run-type", default="estimate", help="Type of build to run") +@click.option( + "--run-without-orchestration", + is_flag=True, + help="Run without orchestration", +) +@pass_context +async def build( + ctx, collection_id, settings, run_type, run_without_orchestration +): + """Build a graph with specified settings.""" + client: R2RAsyncClient = ctx.obj + run_with_orchestration = not run_without_orchestration + + with timer(): + response = await client.graphs.build( + collection_id=collection_id, + settings=settings, + run_type=run_type, + run_with_orchestration=run_with_orchestration, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of communities to return. Defaults to 100.", +) +@pass_context +async def list_communities(ctx, collection_id, offset, limit): + """List communities in a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.list_communities( + collection_id=collection_id, + offset=offset, + limit=limit, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.argument("community_id", required=True, type=str) +@pass_context +async def get_community(ctx, collection_id, community_id): + """Get community information from a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.get_community( + collection_id=collection_id, + community_id=community_id, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.argument("community_id", required=True, type=str) +@click.option("--name", help="New name for the community") +@click.option("--summary", help="New summary for the community") +@click.option( + "--findings", + type=JSON, + help="New findings for the community as JSON array", +) +@click.option("--rating", type=int, help="New rating for the community") +@click.option( + "--rating-explanation", help="New rating explanation for the community" +) +@click.option("--level", type=int, help="New level for the community") +@click.option( + "--attributes", type=JSON, help="New attributes for the community as JSON" +) +@pass_context +async def update_community( + ctx, + collection_id, + community_id, + name, + summary, + findings, + rating, + rating_explanation, + level, + attributes, +): + """Update community information.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.update_community( + collection_id=collection_id, + community_id=community_id, + name=name, + summary=summary, + findings=findings, + rating=rating, + rating_explanation=rating_explanation, + level=level, + attributes=attributes, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.argument("community_id", required=True, type=str) +@pass_context +async def delete_community(ctx, collection_id, community_id): + """Delete a community from a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.delete_community( + collection_id=collection_id, + community_id=community_id, + ) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@pass_context +async def pull(ctx, collection_id): + """Pull documents into a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.pull(collection_id=collection_id) + + click.echo(json.dumps(response, indent=2)) + + +@graphs.command() +@click.argument("collection_id", required=True, type=str) +@click.argument("document_id", required=True, type=str) +@pass_context +async def remove_document(ctx, collection_id, document_id): + """Remove a document from a graph.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.graphs.remove_document( + collection_id=collection_id, + document_id=document_id, + ) + + click.echo(json.dumps(response, indent=2)) diff --git a/py/cli/commands/indices.py b/py/cli/commands/indices.py new file mode 100644 index 000000000..942697646 --- /dev/null +++ b/py/cli/commands/indices.py @@ -0,0 +1,89 @@ +import json + +import asyncclick as click +from asyncclick import pass_context + +from cli.utils.timer import timer +from r2r import R2RAsyncClient + + +@click.group() +def indices(): + """Indices commands.""" + pass + + +@indices.command() +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list(ctx, offset, limit): + """Get an overview of indices.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.indices.list( + offset=offset, + limit=limit, + ) + + for user in response["results"]: + click.echo(json.dumps(user, indent=2)) + + +@indices.command() +@click.argument("index_name", required=True, type=str) +@click.argument("table_name", required=True, type=str) +@pass_context +async def retrieve(ctx, index_name, table_name): + """Retrieve an index by name.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.indices.retrieve( + index_name=index_name, + table_name=table_name, + ) + + click.echo(json.dumps(response, indent=2)) + + +@indices.command() +@click.argument("index_name", required=True, type=str) +@click.argument("table_name", required=True, type=str) +@pass_context +async def delete(ctx, index_name, table_name): + """Delete an index by name.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.indices.retrieve( + index_name=index_name, + table_name=table_name, + ) + + click.echo(json.dumps(response, indent=2)) + + +@indices.command() +@click.argument("id", required=True, type=str) +@pass_context +async def list_branches(ctx, id): + """List all branches in a conversation.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.indices.list_branches( + id=id, + ) + + for user in response["results"]: + click.echo(json.dumps(user, indent=2)) diff --git a/py/cli/commands/ingestion.py b/py/cli/commands/ingestion.py deleted file mode 100644 index 0d4d3a091..000000000 --- a/py/cli/commands/ingestion.py +++ /dev/null @@ -1,306 +0,0 @@ -import json -import os -import tempfile -import uuid -from urllib.parse import urlparse - -import asyncclick as click -import requests -from asyncclick import pass_context - -from cli.command_group import cli -from cli.utils.param_types import JSON -from cli.utils.timer import timer -from shared.abstractions import IndexMeasure, IndexMethod, VectorTableName - - -async def ingest_files_from_urls(client, urls): - """Download and ingest files from given URLs.""" - files_to_ingest = [] - metadatas = [] - document_ids = [] - temp_files = [] - - try: - for url in urls: - filename = os.path.basename(urlparse(url).path) - is_pdf = filename.lower().endswith(".pdf") - - temp_file = tempfile.NamedTemporaryFile( - mode="wb" if is_pdf else "w+", - delete=False, - suffix=f"_{filename}", - ) - temp_files.append(temp_file) - - response = requests.get(url) - response.raise_for_status() - if is_pdf: - temp_file.write(response.content) - else: - temp_file.write(response.text) - temp_file.close() - - files_to_ingest.append(temp_file.name) - metadatas.append({"title": filename}) - # TODO: use the utils function generate_document_id - document_ids.append(uuid.uuid5(uuid.NAMESPACE_DNS, url)) - - response = await client.ingest_files( - files_to_ingest, metadatas=metadatas, document_ids=document_ids - ) - - return response["results"] - finally: - # Clean up temporary files - for temp_file in temp_files: - os.unlink(temp_file.name) - - -@cli.command() -@click.argument( - "file_paths", nargs=-1, required=True, type=click.Path(exists=True) -) -@click.option( - "--document-ids", multiple=True, help="Document IDs for ingestion" -) -@click.option( - "--metadatas", type=JSON, help="Metadatas for ingestion as a JSON string" -) -@click.option( - "--run-without-orchestration", is_flag=True, help="Run with orchestration" -) -@pass_context -async def ingest_files( - ctx, file_paths, document_ids, metadatas, run_without_orchestration -): - """Ingest files into R2R.""" - client = ctx.obj - with timer(): - file_paths = list(file_paths) - document_ids = list(document_ids) if document_ids else None - run_with_orchestration = not run_without_orchestration - response = await client.ingest_files( - file_paths, - metadatas, - document_ids, - run_with_orchestration=run_with_orchestration, - ) - click.echo(json.dumps(response, indent=2)) - - -@cli.command() -@click.argument( - "file-paths", nargs=-1, required=True, type=click.Path(exists=True) -) -@click.option( - "--document-ids", - required=True, - help="Document IDs to update (comma-separated)", -) -@click.option( - "--metadatas", type=JSON, help="Metadatas for updating as a JSON string" -) -@click.option( - "--run-without-orchestration", is_flag=True, help="Run with orchestration" -) -@pass_context -async def update_files( - ctx, file_paths, document_ids, metadatas, run_without_orchestration -): - """Update existing files in R2R.""" - client = ctx.obj - with timer(): - file_paths = list(file_paths) - - document_ids = document_ids.split(",") - - if metadatas: - if isinstance(metadatas, str): - metadatas = json.loads(metadatas) - if isinstance(metadatas, dict): - metadatas = [metadatas] - elif not isinstance(metadatas, list): - raise click.BadParameter( - "Metadatas must be a JSON string representing a list of dictionaries or a single dictionary" - ) - run_with_orchestration = not run_without_orchestration - response = await client.update_files( - file_paths, - document_ids, - metadatas, - run_with_orchestration=run_with_orchestration, - ) - click.echo(json.dumps(response, indent=2)) - - -@cli.command() -@click.option( - "--v2", is_flag=True, help="use aristotle_v2.txt (a smaller file)" -) -@click.option( - "--v3", is_flag=True, help="use aristotle_v3.txt (a larger file)" -) -@pass_context -async def ingest_sample_file(ctx, v2=False, v3=False): - """Ingest the first sample file into R2R.""" - sample_file_url = f"https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/aristotle{'_v2' if v2 else ''}{'_v3' if v3 else ''}.txt" - client = ctx.obj - - with timer(): - response = await ingest_files_from_urls(client, [sample_file_url]) - click.echo( - f"Sample file ingestion completed. Ingest files response:\n\n{response}" - ) - - -@cli.command() -@pass_context -async def ingest_sample_files(ctx): - """Ingest multiple sample files into R2R.""" - client = ctx.obj - urls = [ - "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_3.html", - "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_4.html", - "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_5.html", - "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/lyft_2021.pdf", - "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/uber_2021.pdf", - "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/aristotle.txt", - "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/got.txt", - "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_1.html", - "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/pg_essay_2.html", - ] - with timer(): - response = await ingest_files_from_urls(client, urls) - - click.echo( - f"Sample files ingestion completed. Ingest files response:\n\n{response}" - ) - - -@cli.command() -@pass_context -async def ingest_sample_files_from_unstructured(ctx): - """Ingest multiple sample files from URLs into R2R.""" - client = ctx.obj - - # Get the absolute path of the current script - current_script_path = os.path.abspath(__file__) - - # Navigate to the root directory of the project - root_dir = os.path.dirname( - os.path.dirname(os.path.dirname(current_script_path)) - ) - - # Construct the absolute path to the data_unstructured folder - folder = os.path.join(root_dir, "core", "examples", "data_unstructured") - - file_paths = [os.path.join(folder, file) for file in os.listdir(folder)] - - with timer(): - response = await client.ingest_files(file_paths) - - click.echo( - f"Sample files ingestion completed. Ingest files response:\n\n{response}" - ) - - -@cli.command() -@click.option( - "--table-name", - type=click.Choice([t.value for t in VectorTableName]), - default=VectorTableName.VECTORS.value, - help="Table to create index on", -) -@click.option( - "--index-method", - type=click.Choice([m.value for m in IndexMethod]), - default=IndexMethod.hnsw.value, - help="Indexing method to use", -) -@click.option( - "--index-measure", - type=click.Choice([m.value for m in IndexMeasure]), - default=IndexMeasure.cosine_distance.value, - help="Distance measure to use", -) -@click.option( - "--index-arguments", - type=JSON, - help="Additional index arguments as JSON", -) -@click.option( - "--index-name", - help="Custom name for the index", -) -@click.option( - "--no-concurrent", - is_flag=True, - help="Disable concurrent index creation", -) -@pass_context -async def create_vector_index( - ctx, - table_name, - index_method, - index_measure, - index_arguments, - index_name, - index_column, - no_concurrent, -): - """Create a vector index for similarity search.""" - client = ctx.obj - with timer(): - response = await client.create_vector_index( - table_name=table_name, - index_method=index_method, - index_measure=index_measure, - index_arguments=index_arguments, - index_name=index_name, - index_column=index_column, - concurrently=not no_concurrent, - ) - click.echo(json.dumps(response, indent=2)) - - -@cli.command() -@click.option( - "--table-name", - type=click.Choice([t.value for t in VectorTableName]), - default=VectorTableName.VECTORS.value, - help="Table to list indices from", -) -@pass_context -async def list_vector_indices(ctx, table_name): - """List all vector indices for a table.""" - client = ctx.obj - with timer(): - response = await client.list_vector_indices(table_name=table_name) - click.echo(json.dumps(response, indent=2)) - - -@cli.command() -@click.argument("index-name", required=True) -@click.option( - "--table-name", - type=click.Choice([t.value for t in VectorTableName]), - default=VectorTableName.VECTORS.value, - help="Table containing the index", -) -@click.option( - "--no-concurrent", - is_flag=True, - help="Disable concurrent index deletion", -) -@pass_context -async def delete_vector_index(ctx, index_name, table_name, no_concurrent): - """Delete a vector index.""" - client = ctx.obj - with timer(): - response = await client.delete_vector_index( - index_name=index_name, - table_name=table_name, - concurrently=not no_concurrent, - ) - click.echo(json.dumps(response, indent=2)) diff --git a/py/cli/commands/kg.py b/py/cli/commands/kg.py deleted file mode 100644 index e0a1a2735..000000000 --- a/py/cli/commands/kg.py +++ /dev/null @@ -1,296 +0,0 @@ -import json - -import asyncclick as click -from asyncclick import pass_context - -from cli.command_group import cli -from cli.utils.timer import timer - - -@cli.command() -@click.option( - "--collection-id", - required=False, - default="", - help="Collection ID to create graph for.", -) -@click.option( - "--run", - is_flag=True, - help="Run the graph creation process.", -) -@click.option( - "--kg-creation-settings", - required=False, - help="Settings for the graph creation process.", -) -@click.option( - "--force-kg-creation", - is_flag=True, - help="Force the graph creation process.", -) -@pass_context -async def create_graph( - ctx, collection_id, run, kg_creation_settings, force_kg_creation -): - client = ctx.obj - - if kg_creation_settings: - try: - kg_creation_settings = json.loads(kg_creation_settings) - except json.JSONDecodeError: - click.echo( - "Error: kg-creation-settings must be a valid JSON string" - ) - return - else: - kg_creation_settings = {} - - run_type = "run" if run else "estimate" - - if force_kg_creation: - kg_creation_settings = {"force_kg_creation": True} - - with timer(): - response = await client.create_graph( - collection_id=collection_id, - run_type=run_type, - kg_creation_settings=kg_creation_settings, - ) - - click.echo(json.dumps(response, indent=2)) - - -@cli.command() -@click.option( - "--collection-id", - required=False, - help="Collection ID to deduplicate entities for.", -) -@click.option( - "--run", - is_flag=True, - help="Run the deduplication process.", -) -@click.option( - "--force-deduplication", - is_flag=True, - help="Force the deduplication process.", -) -@click.option( - "--deduplication-settings", - required=False, - help="Settings for the deduplication process.", -) -@pass_context -async def deduplicate_entities( - ctx, collection_id, run, force_deduplication, deduplication_settings -): - """ - Deduplicate entities in the knowledge graph. - """ - client = ctx.obj - - if deduplication_settings: - try: - deduplication_settings = json.loads(deduplication_settings) - except json.JSONDecodeError: - click.echo( - "Error: deduplication-settings must be a valid JSON string" - ) - return - else: - deduplication_settings = {} - - run_type = "run" if run else "estimate" - - if force_deduplication: - deduplication_settings = {"force_deduplication": True} - - with timer(): - response = await client.deduplicate_entities( - collection_id, run_type, deduplication_settings - ) - - click.echo(json.dumps(response, indent=2)) - - -@cli.command() -@click.option( - "--collection-id", - required=False, - default="", - help="Collection ID to enrich graph for.", -) -@click.option( - "--run", - is_flag=True, - help="Run the graph enrichment process.", -) -@click.option( - "--force-kg-enrichment", - is_flag=True, - help="Force the graph enrichment process.", -) -@click.option( - "--kg-enrichment-settings", - required=False, - help="Settings for the graph enrichment process.", -) -@pass_context -async def enrich_graph( - ctx, collection_id, run, force_kg_enrichment, kg_enrichment_settings -): - """ - Enrich an existing graph. - """ - client = ctx.obj - - if kg_enrichment_settings: - try: - kg_enrichment_settings = json.loads(kg_enrichment_settings) - except json.JSONDecodeError: - click.echo( - "Error: kg-enrichment-settings must be a valid JSON string" - ) - return - else: - kg_enrichment_settings = {} - - run_type = "run" if run else "estimate" - - if force_kg_enrichment: - kg_enrichment_settings = {"force_kg_enrichment": True} - - with timer(): - response = await client.enrich_graph( - collection_id, run_type, kg_enrichment_settings - ) - - click.echo(json.dumps(response, indent=2)) - - -@cli.command() -@click.option( - "--collection-id", - required=True, - help="Collection ID to retrieve entities from.", -) -@click.option( - "--offset", - type=int, - default=0, - help="Offset for pagination.", -) -@click.option( - "--limit", - type=int, - default=100, - help="Limit for pagination.", -) -@click.option( - "--entity-ids", - multiple=True, - help="Entity IDs to filter by.", -) -@click.option( - "--entity-level", - default="collection", - help="Entity level to filter by.", -) -@pass_context -async def get_entities( - ctx, collection_id, offset, limit, entity_ids, entity_level -): - """ - Retrieve entities from the knowledge graph. - """ - client = ctx.obj - - with timer(): - response = await client.get_entities( - entity_level, - collection_id, - offset, - limit, - list(entity_ids), - ) - - click.echo(json.dumps(response, indent=2)) - - -@cli.command() -@click.option( - "--collection-id", - required=True, - help="Collection ID to retrieve triples from.", -) -@click.option( - "--offset", - type=int, - default=0, - help="Offset for pagination.", -) -@click.option( - "--limit", - type=int, - default=100, - help="Limit for pagination.", -) -@click.option( - "--triple-ids", - multiple=True, - help="Triple IDs to filter by.", -) -@click.option( - "--entity-names", - multiple=True, - help="Entity names to filter by.", -) -@pass_context -async def get_triples( - ctx, collection_id, offset, limit, triple_ids, entity_names -): - """ - Retrieve triples from the knowledge graph. - """ - client = ctx.obj - - with timer(): - response = await client.get_triples( - collection_id, - offset, - limit, - list(entity_names), - list(triple_ids), - ) - - click.echo(json.dumps(response, indent=2)) - - -@cli.command() -@click.option( - "--collection-id", - required=True, - help="Collection ID to delete the graph for.", -) -@click.option( - "--cascade", - is_flag=True, - help="Whether to cascade the deletion.", -) -@pass_context -async def delete_graph_for_collection(ctx, collection_id, cascade): - """ - Delete the graph for a given collection. - - NOTE: Setting the cascade flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. - """ - client = ctx.obj - - with timer(): - response = await client.delete_graph_for_collection( - collection_id, cascade - ) - - click.echo(json.dumps(response, indent=2)) diff --git a/py/cli/commands/management.py b/py/cli/commands/management.py deleted file mode 100644 index 934ebbc3c..000000000 --- a/py/cli/commands/management.py +++ /dev/null @@ -1,167 +0,0 @@ -from typing import Any, Dict - -import asyncclick as click -from asyncclick import pass_context - -from cli.command_group import cli -from cli.utils.param_types import JSON -from cli.utils.timer import timer - - -@cli.command() -@click.option("--filters", type=JSON, help="Filters for analytics as JSON") -@click.option("--analysis-types", type=JSON, help="Analysis types as JSON") -@pass_context -async def analytics( - ctx, filters: Dict[str, Any], analysis_types: Dict[str, Any] -): - client = ctx.obj - """Retrieve analytics data.""" - with timer(): - response = await client.analytics(filters, analysis_types) - - click.echo(response) - - -@cli.command() -@pass_context -async def app_settings(ctx): - """Retrieve application settings.""" - client = ctx.obj - with timer(): - response = await client.app_settings() - - click.echo(response) - - -@cli.command() -@click.option("--user-ids", multiple=True, help="User IDs to overview") -@click.option( - "--offset", - default=None, - help="The offset to start from. Defaults to 0.", -) -@click.option( - "--limit", - default=None, - help="The maximum number of nodes to return. Defaults to 100.", -) -@pass_context -async def users_overview(ctx, user_ids, offset, limit): - """Get an overview of users.""" - client = ctx.obj - user_ids = list(user_ids) if user_ids else None - - with timer(): - response = await client.users_overview(user_ids, offset, limit) - - if "results" in response: - click.echo("\nUser Overview:") - click.echo( - f"{'User ID':<40} {'Num Files':<10} {'Total Size (bytes)':<20} Document IDs" - ) - for user in response["results"]: - click.echo( - f"{user['user_id']:<40} {user['num_files']:<10} {user['total_size_in_bytes']:<20} {', '.join(user['document_ids'][:3]) + ('...' if len(user['document_ids']) > 3 else '')}" - ) - else: - click.echo("No users found.") - - -@cli.command() -@click.option( - "--filter", - "-f", - multiple=True, - help="Filters for deletion in the format key:operator:value", -) -@pass_context -async def delete(ctx, filter): - """Delete documents based on filters.""" - client = ctx.obj - filters = {} - for f in filter: - key, operator, value = f.split(":", 2) - if key not in filters: - filters[key] = {} - filters[key][f"${operator}"] = value - - with timer(): - response = await client.delete(filters=filters) - - click.echo(response) - - -@cli.command() -@click.option("--document-ids", multiple=True, help="Document IDs to overview") -@click.option( - "--offset", - default=None, - help="The offset to start from. Defaults to 0.", -) -@click.option( - "--limit", - default=None, - help="The maximum number of nodes to return. Defaults to 100.", -) -@pass_context -async def documents_overview(ctx, document_ids, offset, limit): - """Get an overview of documents.""" - client = ctx.obj - document_ids = list(document_ids) if document_ids else None - - with timer(): - response = await client.documents_overview(document_ids, offset, limit) - - for document in response["results"]: - click.echo(document) - - -@cli.command() -@click.option("--document-id", help="Document ID to retrieve chunks for") -@click.option( - "--offset", - default=None, - help="The offset to start from. Defaults to 0.", -) -@click.option( - "--limit", - default=None, - help="The maximum number of nodes to return. Defaults to 100.", -) -@click.option( - "--include-vectors", - is_flag=True, - default=False, - help="Should the vector be included in the response chunks", -) -@pass_context -async def document_chunks(ctx, document_id, offset, limit, include_vectors): - """Get chunks of a specific document.""" - client = ctx.obj - if not document_id: - click.echo("Error: Document ID is required.") - return - - with timer(): - chunks_data = await client.document_chunks( - document_id, offset, limit, include_vectors - ) - - chunks = chunks_data["results"] - if not chunks: - click.echo("No chunks found for the given document ID.") - return - - click.echo(f"\nNumber of chunks: {len(chunks)}") - - for index, chunk in enumerate(chunks, 1): - click.echo(f"\nChunk {index}:") - if isinstance(chunk, dict): - click.echo(f"Extraction ID: {chunk.get('extraction_id', 'N/A')}") - click.echo(f"Text: {chunk.get('text', '')[:100]}...") - click.echo(f"Metadata: {chunk.get('metadata', {})}") - if include_vectors: - click.echo(f"Vector: {chunk.get('vector', 'N/A')}") - else: - click.echo(f"Unexpected chunk format: {chunk}") diff --git a/py/cli/commands/prompts.py b/py/cli/commands/prompts.py new file mode 100644 index 000000000..07bf18be2 --- /dev/null +++ b/py/cli/commands/prompts.py @@ -0,0 +1,60 @@ +import json + +import asyncclick as click +from asyncclick import pass_context + +from cli.utils.timer import timer +from r2r import R2RAsyncClient + + +@click.group() +def prompts(): + """Prompts commands.""" + pass + + +@prompts.command() +@pass_context +async def list(ctx): + """Get an overview of prompts.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.prompts.list() + + for prompt in response["results"]: + click.echo(json.dumps(prompt, indent=2)) + + +@prompts.command() +@click.argument("name", type=str) +@click.option("--inputs", default=None, type=str) +@click.option("--prompt-override", default=None, type=str) +@pass_context +async def retrieve(ctx, name, inputs, prompt_override): + """Retrieve an prompts by name.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.prompts.retrieve( + name=name, + inputs=inputs, + prompt_override=prompt_override, + ) + + click.echo(json.dumps(response, indent=2)) + + +@prompts.command() +@click.argument("name", required=True, type=str) +@pass_context +async def delete(ctx, name): + """Delete an index by name.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.prompts.delete( + name=name, + ) + + click.echo(json.dumps(response, indent=2)) diff --git a/py/cli/commands/retrieval.py b/py/cli/commands/retrieval.py index f0d5604d4..b472cd3d6 100644 --- a/py/cli/commands/retrieval.py +++ b/py/cli/commands/retrieval.py @@ -1,165 +1,128 @@ +import json + import asyncclick as click from asyncclick import pass_context -from cli.command_group import cli from cli.utils.param_types import JSON from cli.utils.timer import timer +from r2r import R2RAsyncClient + +@click.group() +def retrieval(): + """Retrieval commands.""" + pass -@cli.command() + +@retrieval.command() @click.option( "--query", prompt="Enter your search query", help="The search query" ) -# SearchSettings -@click.option( - "--use-vector-search", - is_flag=True, - default=True, - help="Whether to use vector search", -) -@click.option( - "--filters", - type=JSON, - help="""Filters to apply to the vector search as a JSON, e.g. --filters='{"document_id":{"$in":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "3e157b3a-8469-51db-90d9-52e7d896b49b"]}}'""", -) @click.option( - "--search-limit", default=None, help="Number of search results to return" + "--limit", default=None, help="Number of search results to return" ) @click.option( - "--use-hybrid-search", is_flag=True, help="Perform hybrid search" + "--use-hybrid-search", + default=None, + help="Perform hybrid search? Equivalent to `use-semantic-search` and `use-fulltext-search`", ) @click.option( - "--selected-collection-ids", - type=JSON, - help="Collection IDs to search for as a JSON", + "--use-semantic-search", default=None, help="Perform semantic search?" ) -# KGSearchSettings @click.option( - "--use-kg-search", is_flag=True, help="Use knowledge graph search" + "--use-fulltext-search", default=None, help="Perform fulltext search?" ) -@click.option("--kg-search-type", default=None, help="Local or Global") -@click.option("--kg-search-level", default=None, help="Level of KG search") @click.option( - "--kg-search-generation-config", + "--filters", type=JSON, - help="KG search generation config", -) -@click.option( - "--entity-types", type=JSON, help="Entity types to search for as a JSON" + help="""Filters to apply to the vector search as a JSON, e.g. --filters='{"document_id":{"$in":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "3e157b3a-8469-51db-90d9-52e7d896b49b"]}}'""", ) @click.option( - "--relationships", type=JSON, help="Relationships to search for as a JSON" + "--search-strategy", + type=str, + default="vanilla", + help="Vanilla RAG or complex method like query fusion or HyDE.", ) @click.option( - "--max-community-description-length", - type=JSON, - help="Max community description length", + "--graph-search-enabled", default=None, help="Use knowledge graph search?" ) @click.option( - "--search-strategy", - type=str, - help="Vanilla search or complex search method like query fusion or HyDE.", + "--chunk-search-enabled", + default=None, + help="Use search over document chunks?", ) -@click.option("--local-search-limits", type=JSON, help="Local search limits") @pass_context async def search(ctx, query, **kwargs): """Perform a search query.""" - client = ctx.obj - vector_search_settings = { + client: R2RAsyncClient = ctx.obj + search_settings = { k: v for k, v in kwargs.items() if k in [ - "use_vector_search", "filters", - "search_limit", + "limit", + "search_strategy", "use_hybrid_search", - "selected_collection_ids", + "use_semantic_search", + "use_fulltext_search", "search_strategy", ] and v is not None } + graph_search_enabled = kwargs.get("graph_search_enabled") + if graph_search_enabled != None: + search_settings["graph_settings"] = {"enabled": graph_search_enabled} - kg_search_settings = { - k: v - for k, v in kwargs.items() - if k - in [ - "use_kg_search", - "kg_search_type", - "kg_search_level", - "generation_config", - "entity_types", - "relationships", - "max_community_description_length", - "local_search_limits", - ] - and v is not None - } + chunk_search_enabled = kwargs.get("chunk_search_enabled") + if chunk_search_enabled != None: + search_settings["chunk_settings"] = {"enabled": chunk_search_enabled} with timer(): - results = await client.search( + results = await client.retrieval.search( query, - vector_search_settings, - kg_search_settings, + search_settings, ) if isinstance(results, dict) and "results" in results: results = results["results"] - if "vector_search_results" in results: + if "chunk_search_results" in results: click.echo("Vector search results:") - for result in results["vector_search_results"]: - click.echo(result) + for result in results["chunk_search_results"]: + click.echo(json.dumps(result, indent=2)) - if "kg_search_results" in results and results["kg_search_results"]: + if ( + "graph_search_results" in results + and results["graph_search_results"] + ): click.echo("KG search results:") - for result in results["kg_search_results"]: - click.echo(result) + for result in results["graph_search_results"]: + click.echo(json.dumps(result, indent=2)) -@cli.command() -@click.option("--query", prompt="Enter your query", help="The query for RAG") -# RAG Generation Config -@click.option("--stream", is_flag=True, help="Stream the RAG response") -@click.option("--rag-model", default=None, help="Model for RAG") -# Vector Search Settings -@click.option( - "--use-vector-search", is_flag=True, default=True, help="Use vector search" -) -@click.option("--filters", type=JSON, help="Search filters as JSON") -@click.option( - "--search-limit", default=10, help="Number of search results to return" -) -@click.option( - "--use-hybrid-search", is_flag=True, help="Perform hybrid search" -) +@retrieval.command() @click.option( - "--selected-collection-ids", - type=JSON, - help="Collection IDs to search for as a JSON", + "--query", prompt="Enter your search query", help="The search query" ) -# KG Search Settings @click.option( - "--use-kg-search", is_flag=True, help="Use knowledge graph search" + "--limit", default=None, help="Number of search results to return" ) -@click.option("--kg-search-type", default="local", help="Local or Global") @click.option( - "--kg-search-level", + "--use-hybrid-search", default=None, - help="Level of cluster to use for Global KG search", + help="Perform hybrid search? Equivalent to `use-semantic-search` and `use-fulltext-search`", ) -@click.option("--kg-search-model", default=None, help="Model for KG agent") @click.option( - "--entity-types", type=JSON, help="Entity types to search for as a JSON" + "--use-semantic-search", default=None, help="Perform semantic search?" ) @click.option( - "--relationships", type=JSON, help="Relationships to search for as a JSON" + "--use-fulltext-search", default=None, help="Perform fulltext search?" ) @click.option( - "--max-community-description-length", - type=int, - help="Max community description length", + "--filters", + type=JSON, + help="""Filters to apply to the vector search as a JSON, e.g. --filters='{"document_id":{"$in":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "3e157b3a-8469-51db-90d9-52e7d896b49b"]}}'""", ) @click.option( "--search-strategy", @@ -167,60 +130,54 @@ async def search(ctx, query, **kwargs): default="vanilla", help="Vanilla RAG or complex method like query fusion or HyDE.", ) -@click.option("--local-search-limits", type=JSON, help="Local search limits") +@click.option( + "--graph-search-enabled", default=None, help="Use knowledge graph search?" +) +@click.option( + "--chunk-search-enabled", + default=None, + help="Use search over document chunks?", +) +@click.option("--stream", is_flag=True, help="Stream the RAG response") +@click.option("--rag-model", default=None, help="Model for RAG") @pass_context async def rag(ctx, query, **kwargs): """Perform a RAG query.""" - client = ctx.obj + client: R2RAsyncClient = ctx.obj rag_generation_config = { "stream": kwargs.get("stream", False), } if kwargs.get("rag_model"): rag_generation_config["model"] = kwargs["rag_model"] - vector_search_settings = { + search_settings = { k: v for k, v in kwargs.items() if k in [ - "use_vector_search", "filters", - "search_limit", + "limit", + "search_strategy", "use_hybrid_search", - "selected_collection_ids", + "use_semantic_search", + "use_fulltext_search", "search_strategy", ] and v is not None } + graph_search_enabled = kwargs.get("graph_search_enabled") + if graph_search_enabled != None: + search_settings["graph_settings"] = {"enabled": graph_search_enabled} - kg_search_settings = { - k: v - for k, v in kwargs.items() - if k - in [ - "use_kg_search", - "kg_search_type", - "kg_search_level", - "kg_search_model", - "entity_types", - "relationships", - "max_community_description_length", - "local_search_limits", - ] - and v is not None - } - - if kg_search_settings.get("kg_search_model"): - kg_search_settings["generation_config"] = { - "model": kg_search_settings.pop("kg_search_model") - } + chunk_search_enabled = kwargs.get("chunk_search_enabled") + if chunk_search_enabled != None: + search_settings["chunk_settings"] = {"enabled": chunk_search_enabled} with timer(): - response = await client.rag( - query, - rag_generation_config, - vector_search_settings, - kg_search_settings, + response = await client.retrieval.rag( + query=query, + rag_generation_config=rag_generation_config, + search_settings={**search_settings}, ) if rag_generation_config.get("stream"): @@ -228,7 +185,4 @@ async def rag(ctx, query, **kwargs): click.echo(chunk, nl=False) click.echo() else: - click.echo(response) - - -# TODO: Implement agent + click.echo(json.dumps(response["results"]["completion"], indent=2)) diff --git a/py/cli/commands/server.py b/py/cli/commands/system.py similarity index 91% rename from py/cli/commands/server.py rename to py/cli/commands/system.py index 3d3a9e52a..10a7ac4ac 100644 --- a/py/cli/commands/server.py +++ b/py/cli/commands/system.py @@ -18,45 +18,43 @@ wait_for_container_health, ) from cli.utils.timer import timer +from r2r import R2RAsyncClient -@cli.command() -@pass_context -async def health(ctx): - """Check the health of the server.""" - client = ctx.obj - with timer(): - response = await client.health() - - click.echo(response) +@click.group() +def system(): + """System commands.""" + pass @cli.command() @pass_context -async def server_stats(ctx): - client = ctx.obj - """Check the server stats.""" +async def health(ctx): + """Check the health of the server.""" + client: R2RAsyncClient = ctx.obj with timer(): - response = await client.server_stats() + response = await client.system.health() - click.echo(response) + click.echo(json.dumps(response, indent=2)) -@cli.command() +@system.command() +@click.option("--run-type-filter", help="Filter for log types") @click.option( "--offset", default=None, help="Pagination offset. Default is None." ) @click.option( "--limit", default=None, help="Pagination limit. Defaults to 100." ) -@click.option("--run-type-filter", help="Filter for log types") @pass_context async def logs(ctx, run_type_filter, offset, limit): """Retrieve logs with optional type filter.""" - client = ctx.obj + client: R2RAsyncClient = ctx.obj with timer(): - response = await client.logs( - offset=offset, limit=limit, run_type_filter=run_type_filter + response = await client.system.logs( + run_type_filter=run_type_filter, + offset=offset, + limit=limit, ) for log in response["results"]: @@ -72,133 +70,26 @@ async def logs(ctx, run_type_filter, offset, limit): click.echo(f"Total runs: {len(response['results'])}") -@cli.command() -@click.option( - "--volumes", - is_flag=True, - help="Remove named volumes declared in the `volumes` section of the Compose file", -) -@click.option( - "--remove-orphans", - is_flag=True, - help="Remove containers for services not defined in the Compose file", -) -@click.option( - "--project-name", - default=None, - help="Which Docker Compose project to bring down", -) -def docker_down(volumes, remove_orphans, project_name): - """Bring down the Docker Compose setup and attempt to remove the network if necessary.""" - - if not project_name: - print("Bringing down the default R2R Docker setup(s)...") - try: - result = bring_down_docker_compose( - project_name or "r2r", volumes, remove_orphans - ) - except: - pass - try: - result = bring_down_docker_compose( - project_name or "r2r-full", volumes, remove_orphans - ) - except: - pass - else: - print(f"Bringing down the `{project_name}` R2R Docker setup...") - result = bring_down_docker_compose( - project_name, volumes, remove_orphans - ) - - if result != 0: - click.echo( - f"An error occurred while bringing down the {project_name} Docker Compose setup. Attempting to remove the network..." - ) - else: - click.echo( - f"{project_name} Docker Compose setup has been successfully brought down." - ) - remove_r2r_network() - - -@cli.command() -def generate_report(): - """Generate a system report including R2R version, Docker info, and OS details.""" - - # Get R2R version - from importlib.metadata import version - - report = {"r2r_version": version("r2r")} - - # Get Docker info - try: - subprocess.run( - ["docker", "version"], check=True, capture_output=True, timeout=5 - ) +@system.command() +@pass_context +async def settings(ctx): + """Retrieve application settings.""" + client: R2RAsyncClient = ctx.obj + with timer(): + response = await client.system.settings() - docker_ps_output = subprocess.check_output( - ["docker", "ps", "--format", "{{.ID}}\t{{.Names}}\t{{.Status}}"], - text=True, - timeout=5, - ).strip() - report["docker_ps"] = [ - dict(zip(["id", "name", "status"], line.split("\t"))) - for line in docker_ps_output.split("\n") - if line - ] + click.echo(json.dumps(response, indent=2)) - docker_network_output = subprocess.check_output( - ["docker", "network", "ls", "--format", "{{.ID}}\t{{.Name}}"], - text=True, - timeout=5, - ).strip() - networks = [ - dict(zip(["id", "name"], line.split("\t"))) - for line in docker_network_output.split("\n") - if line - ] - report["docker_subnets"] = [] - for network in networks: - inspect_output = subprocess.check_output( - [ - "docker", - "network", - "inspect", - network["id"], - "--format", - "{{range .IPAM.Config}}{{.Subnet}}{{end}}", - ], - text=True, - timeout=5, - ).strip() - if subnet := inspect_output: - network["subnet"] = subnet - report["docker_subnets"].append(network) - - except subprocess.CalledProcessError as e: - report["docker_error"] = f"Error running Docker command: {e}" - except FileNotFoundError: - report["docker_error"] = ( - "Docker command not found. Is Docker installed and in PATH?" - ) - except subprocess.TimeoutExpired: - report["docker_error"] = ( - "Docker command timed out. Docker might be unresponsive." - ) - - # Get OS information - report["os_info"] = { - "system": platform.system(), - "release": platform.release(), - "version": platform.version(), - "machine": platform.machine(), - "processor": platform.processor(), - } +@system.command() +@pass_context +async def status(ctx): + """Get statistics about the server, including the start time, uptime, CPU usage, and memory usage.""" + client: R2RAsyncClient = ctx.obj + with timer(): + response = await client.system.status() - click.echo("System Report:") - click.echo(json.dumps(report, indent=2)) + click.echo(json.dumps(response, indent=2)) @cli.command() @@ -381,6 +272,135 @@ def image_exists(img): await run_local_serve(host, port, config_name, config_path, full) +@cli.command() +@click.option( + "--volumes", + is_flag=True, + help="Remove named volumes declared in the `volumes` section of the Compose file", +) +@click.option( + "--remove-orphans", + is_flag=True, + help="Remove containers for services not defined in the Compose file", +) +@click.option( + "--project-name", + default=None, + help="Which Docker Compose project to bring down", +) +def docker_down(volumes, remove_orphans, project_name): + """Bring down the Docker Compose setup and attempt to remove the network if necessary.""" + + if not project_name: + print("Bringing down the default R2R Docker setup(s)...") + try: + result = bring_down_docker_compose( + project_name or "r2r", volumes, remove_orphans + ) + except: + pass + try: + result = bring_down_docker_compose( + project_name or "r2r-full", volumes, remove_orphans + ) + except: + pass + else: + print(f"Bringing down the `{project_name}` R2R Docker setup...") + result = bring_down_docker_compose( + project_name, volumes, remove_orphans + ) + + if result != 0: + click.echo( + f"An error occurred while bringing down the {project_name} Docker Compose setup. Attempting to remove the network..." + ) + else: + click.echo( + f"{project_name} Docker Compose setup has been successfully brought down." + ) + remove_r2r_network() + + +@cli.command() +def generate_report(): + """Generate a system report including R2R version, Docker info, and OS details.""" + + # Get R2R version + from importlib.metadata import version + + report = {"r2r_version": version("r2r")} + + # Get Docker info + try: + subprocess.run( + ["docker", "version"], check=True, capture_output=True, timeout=5 + ) + + docker_ps_output = subprocess.check_output( + ["docker", "ps", "--format", "{{.ID}}\t{{.Names}}\t{{.Status}}"], + text=True, + timeout=5, + ).strip() + report["docker_ps"] = [ + dict(zip(["id", "name", "status"], line.split("\t"))) + for line in docker_ps_output.split("\n") + if line + ] + + docker_network_output = subprocess.check_output( + ["docker", "network", "ls", "--format", "{{.ID}}\t{{.Name}}"], + text=True, + timeout=5, + ).strip() + networks = [ + dict(zip(["id", "name"], line.split("\t"))) + for line in docker_network_output.split("\n") + if line + ] + + report["docker_subnets"] = [] + for network in networks: + inspect_output = subprocess.check_output( + [ + "docker", + "network", + "inspect", + network["id"], + "--format", + "{{range .IPAM.Config}}{{.Subnet}}{{end}}", + ], + text=True, + timeout=5, + ).strip() + if subnet := inspect_output: + network["subnet"] = subnet + report["docker_subnets"].append(network) + + except subprocess.CalledProcessError as e: + report["docker_error"] = f"Error running Docker command: {e}" + except FileNotFoundError: + report["docker_error"] = ( + "Docker command not found. Is Docker installed and in PATH?" + ) + except subprocess.TimeoutExpired: + report["docker_error"] = ( + "Docker command timed out. Docker might be unresponsive." + ) + + # Get OS information + report["os_info"] = { + "system": platform.system(), + "release": platform.release(), + "version": platform.version(), + "machine": platform.machine(), + "processor": platform.processor(), + } + + click.echo("System Report:") + click.echo(json.dumps(report, indent=2)) + + @cli.command() def update(): """Update the R2R package to the latest version.""" @@ -402,7 +422,7 @@ def update(): @cli.command() def version(): - """Print the version of R2R.""" + """Reports the SDK version.""" from importlib.metadata import version - click.echo(version("r2r")) + click.echo(json.dumps(version("r2r"), indent=2)) diff --git a/py/cli/commands/users.py b/py/cli/commands/users.py new file mode 100644 index 000000000..f8b2445e8 --- /dev/null +++ b/py/cli/commands/users.py @@ -0,0 +1,143 @@ +import json + +import asyncclick as click +from asyncclick import pass_context + +from cli.utils.timer import timer +from r2r import R2RAsyncClient + + +@click.group() +def users(): + """Users commands.""" + pass + + +@users.command() +@click.argument("email", required=True, type=str) +@click.argument("password", required=True, type=str) +@pass_context +async def register(ctx, email, password): + """Create a new user.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.users.register(email=email, password=password) + + click.echo(json.dumps(response, indent=2)) + + +@users.command() +@click.option("--ids", multiple=True, help="Document IDs to fetch") +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list(ctx, ids, offset, limit): + """Get an overview of users.""" + client: R2RAsyncClient = ctx.obj + ids = list(ids) if ids else None + + with timer(): + response = await client.users.list( + ids=ids, + offset=offset, + limit=limit, + ) + + for user in response["results"]: + click.echo(json.dumps(user, indent=2)) + + +@users.command() +@click.argument("id", required=True, type=str) +@pass_context +async def retrieve(ctx, id): + """Retrieve a user by ID.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.users.retrieve(id=id) + + click.echo(json.dumps(response, indent=2)) + + +@users.command() +@pass_context +async def me(ctx): + """Retrieve the current user.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.users.me() + + click.echo(json.dumps(response, indent=2)) + + +@users.command() +@click.argument("id", required=True, type=str) +@click.option( + "--offset", + default=0, + help="The offset to start from. Defaults to 0.", +) +@click.option( + "--limit", + default=100, + help="The maximum number of nodes to return. Defaults to 100.", +) +@pass_context +async def list_collections(ctx, id, offset, limit): + """List collections for a specific user.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.users.list_collections( + id=id, + offset=offset, + limit=limit, + ) + + for collection in response["results"]: + click.echo(json.dumps(collection, indent=2)) + + +@users.command() +@click.argument("id", required=True, type=str) +@click.argument("collection_id", required=True, type=str) +@pass_context +async def add_to_collection(ctx, id, collection_id): + """Retrieve a user by ID.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.users.add_to_collection( + id=id, + collection_id=collection_id, + ) + + click.echo(json.dumps(response, indent=2)) + + +@users.command() +@click.argument("id", required=True, type=str) +@click.argument("collection_id", required=True, type=str) +@pass_context +async def remove_from_collection(ctx, id, collection_id): + """Retrieve a user by ID.""" + client: R2RAsyncClient = ctx.obj + + with timer(): + response = await client.users.remove_from_collection( + id=id, + collection_id=collection_id, + ) + + click.echo(json.dumps(response, indent=2)) diff --git a/py/cli/main.py b/py/cli/main.py index a1ea3020f..ac72befb4 100644 --- a/py/cli/main.py +++ b/py/cli/main.py @@ -1,12 +1,15 @@ from cli.command_group import cli from cli.commands import ( - auth, + collections, + conversations, database, - ingestion, - kg, - management, + documents, + graphs, + indices, + prompts, retrieval, - server, + system, + users, ) from cli.utils.telemetry import posthog, telemetry @@ -15,44 +18,22 @@ def add_command_with_telemetry(command): cli.add_command(telemetry(command)) -# Auth -add_command_with_telemetry(auth.generate_private_key) +# Chunks +add_command_with_telemetry(collections.collections) +add_command_with_telemetry(conversations.conversations) +add_command_with_telemetry(documents.documents) +add_command_with_telemetry(graphs.graphs) -# Ingestion -add_command_with_telemetry(ingestion.ingest_files) -add_command_with_telemetry(ingestion.update_files) -add_command_with_telemetry(ingestion.ingest_sample_file) -add_command_with_telemetry(ingestion.ingest_sample_files) -add_command_with_telemetry(ingestion.ingest_sample_files_from_unstructured) +# Graph +add_command_with_telemetry(indices.indices) +add_command_with_telemetry(prompts.prompts) +add_command_with_telemetry(retrieval.retrieval) +add_command_with_telemetry(users.users) +add_command_with_telemetry(system.system) -# Management -add_command_with_telemetry(management.analytics) -add_command_with_telemetry(management.app_settings) -add_command_with_telemetry(management.users_overview) -add_command_with_telemetry(management.documents_overview) -add_command_with_telemetry(management.document_chunks) - -# Knowledge Graph -add_command_with_telemetry(kg.create_graph) -add_command_with_telemetry(kg.enrich_graph) -add_command_with_telemetry(kg.deduplicate_entities) - -# Retrieval -add_command_with_telemetry(retrieval.search) -add_command_with_telemetry(retrieval.rag) - -# Server -add_command_with_telemetry(server.health) -add_command_with_telemetry(server.server_stats) -add_command_with_telemetry(server.logs) -add_command_with_telemetry(server.docker_down) -add_command_with_telemetry(server.generate_report) -add_command_with_telemetry(server.serve) -add_command_with_telemetry(server.update) -add_command_with_telemetry(server.version) # Database -add_command_with_telemetry(database.db) # Add the main db group +add_command_with_telemetry(database.db) add_command_with_telemetry(database.upgrade) add_command_with_telemetry(database.downgrade) add_command_with_telemetry(database.current) diff --git a/py/cli/utils/database_utils.py b/py/cli/utils/database_utils.py index 5fcb2aaa9..fb2f3088e 100644 --- a/py/cli/utils/database_utils.py +++ b/py/cli/utils/database_utils.py @@ -1,9 +1,8 @@ -import configparser import logging.config import os import sys from pathlib import Path -from typing import Dict, Optional +from typing import Optional import alembic.config import asyncclick as click @@ -12,7 +11,7 @@ from sqlalchemy.exc import OperationalError -def get_default_db_vars() -> Dict[str, str]: +def get_default_db_vars() -> dict[str, str]: """Get default database environment variables.""" return { "R2R_POSTGRES_HOST": "localhost", diff --git a/py/cli/utils/param_types.py b/py/cli/utils/param_types.py index 4916b1c28..f62553d65 100644 --- a/py/cli/utils/param_types.py +++ b/py/cli/utils/param_types.py @@ -1,5 +1,5 @@ import json -from typing import Any, Dict, Optional +from typing import Any, Optional import asyncclick as click @@ -7,7 +7,7 @@ class JsonParamType(click.ParamType): name = "json" - def convert(self, value, param, ctx) -> Optional[Dict[str, Any]]: + def convert(self, value, param, ctx) -> Optional[dict[str, Any]]: if value is None: return None if isinstance(value, dict): diff --git a/py/compose.full.yaml b/py/compose.full.yaml index 60379e5c2..bb72bc287 100644 --- a/py/compose.full.yaml +++ b/py/compose.full.yaml @@ -275,30 +275,30 @@ services: build: context: . args: - PORT: ${R2R_PORT:-${PORT:-7272}} # Eventually get rid of PORT, but for now keep it for backwards compatibility + PORT: ${R2R_PORT:-7272} R2R_PORT: ${R2R_PORT:-${PORT:-7272}} - HOST: ${R2R_HOST:-${HOST:-0.0.0.0}} # Eventually get rid of HOST, but for now keep it for backwards compatibility - R2R_HOST: ${R2R_HOST:-${HOST:-0.0.0.0}} + HOST: ${R2R_HOST:-0.0.0.0} + R2R_HOST: ${R2R_HOST:-0.0.0.0} ports: - - "${R2R_PORT:-${PORT:-7272}}:${R2R_PORT:-${PORT:-7272}}" + - "${R2R_PORT:-7272}:${R2R_PORT:-7272}" environment: - PYTHONUNBUFFERED=1 - - R2R_PORT=${R2R_PORT:-${PORT:-7272}} # Eventually get rid of PORT, but for now keep it for backwards compatibility - - R2R_HOST=${R2R_HOST:-${HOST:-0.0.0.0}} # Eventually get rid of HOST, but for now keep it for backwards compatibility + - R2R_PORT=${R2R_PORT:-7272} + - R2R_HOST=${R2R_HOST:-0.0.0.0} # R2R - - R2R_CONFIG_NAME=${R2R_CONFIG_NAME:-${CONFIG_NAME:-}} # Eventually get rid of CONFIG_NAME, but for now keep it for backwards compatibility - - R2R_CONFIG_PATH=${R2R_CONFIG_PATH:-${CONFIG_PATH:-}} # Eventually get rid of CONFIG_PATH, but for now keep it for backwards compatibility + - R2R_CONFIG_NAME=${R2R_CONFIG_NAME:-} + - R2R_CONFIG_PATH=${R2R_CONFIG_PATH:-} - R2R_PROJECT_NAME=${R2R_PROJECT_NAME:-r2r_default} # Postgres - - R2R_POSTGRES_USER=${R2R_POSTGRES_USER:-${POSTGRES_USER:-postgres}} # Eventually get rid of POSTGRES_USER, but for now keep it for backwards compatibility - - R2R_POSTGRES_PASSWORD=${R2R_POSTGRES_PASSWORD:-${POSTGRES_PASSWORD:-postgres}} # Eventually get rid of POSTGRES_PASSWORD, but for now keep it for backwards compatibility - - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-${POSTGRES_HOST:-postgres}} # Eventually get rid of POSTGRES_HOST, but for now keep it for backwards compatibility - - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-${POSTGRES_PORT:-5432}} # Eventually get rid of POSTGRES_PORT, but for now keep it for backwards compatibility - - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-${POSTGRES_DBNAME:-postgres}} # Eventually get rid of POSTGRES_DBNAME, but for now keep it for backwards compatibility - - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-${POSTGRES_MAX_CONNECTIONS:-1024}} # Eventually get rid of POSTGRES_MAX_CONNECTIONS, but for now keep it for backwards compatibility - - R2R_POSTGRES_PROJECT_NAME=${R2R_POSTGRES_PROJECT_NAME:-${POSTGRES_PROJECT_NAME:-r2r_default}} # Eventually get rid of POSTGRES_PROJECT_NAME, but for now keep it for backwards compatibility + - R2R_POSTGRES_USER=${R2R_POSTGRES_USER:-postgres} + - R2R_POSTGRES_PASSWORD=${R2R_POSTGRES_PASSWORD:-postgres} + - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres} + - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432} + - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres} + - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024} + - R2R_POSTGRES_PROJECT_NAME=${R2R_POSTGRES_PROJECT_NAME:-r2r_default} # OpenAI - OPENAI_API_KEY=${OPENAI_API_KEY:-} @@ -358,7 +358,7 @@ services: networks: - r2r-network healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:${R2R_PORT:-7272}/v2/health"] + test: ["CMD", "curl", "-f", "http://localhost:${R2R_PORT:-7272}/v3/health"] interval: 6s timeout: 5s retries: 5 diff --git a/py/compose.yaml b/py/compose.yaml index 9f9bfbd58..1bb4f4ddd 100644 --- a/py/compose.yaml +++ b/py/compose.yaml @@ -41,30 +41,30 @@ services: build: context: . args: - PORT: ${R2R_PORT:-${PORT:-7272}} # Eventually get rid of PORT, but for now keep it for backwards compatibility - R2R_PORT: ${R2R_PORT:-${PORT:-7272}} - HOST: ${R2R_HOST:-${HOST:-0.0.0.0}} # Eventually get rid of HOST, but for now keep it for backwards compatibility - R2R_HOST: ${R2R_HOST:-${HOST:-0.0.0.0}} + PORT: ${R2R_PORT:-7272} + R2R_PORT: ${R2R_PORT:-7272} + HOST: ${R2R_HOST:-0.0.0.0} + R2R_HOST: ${R2R_HOST:-0.0.0.0} ports: - - "${R2R_PORT:-${PORT:-7272}}:${R2R_PORT:-${PORT:-7272}}" + - "${R2R_PORT:-7272}:${R2R_PORT:-7272}" environment: - PYTHONUNBUFFERED=1 - - R2R_PORT=${R2R_PORT:-${PORT:-7272}} # Eventually get rid of PORT, but for now keep it for backwards compatibility - - R2R_HOST=${R2R_HOST:-${HOST:-0.0.0.0}} # Eventually get rid of HOST, but for now keep it for backwards compatibility + - R2R_PORT=${R2R_PORT:-7272} + - R2R_HOST=${R2R_HOST:-0.0.0.0} # R2R - - R2R_CONFIG_NAME=${R2R_CONFIG_NAME:-${CONFIG_NAME:-}} # Eventually get rid of CONFIG_NAME, but for now keep it for backwards compatibility - - R2R_CONFIG_PATH=${R2R_CONFIG_PATH:-${CONFIG_PATH:-}} # Eventually get rid of CONFIG_PATH, but for now keep it for backwards compatibility + - R2R_CONFIG_NAME=${R2R_CONFIG_NAME:-} + - R2R_CONFIG_PATH=${R2R_CONFIG_PATH:-} - R2R_PROJECT_NAME=${R2R_PROJECT_NAME:-r2r_default} # Postgres - - R2R_POSTGRES_USER=${R2R_POSTGRES_USER:-${POSTGRES_USER:-postgres}} # Eventually get rid of POSTGRES_USER, but for now keep it for backwards compatibility - - R2R_POSTGRES_PASSWORD=${R2R_POSTGRES_PASSWORD:-${POSTGRES_PASSWORD:-postgres}} # Eventually get rid of POSTGRES_PASSWORD, but for now keep it for backwards compatibility - - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-${POSTGRES_HOST:-postgres}} # Eventually get rid of POSTGRES_HOST, but for now keep it for backwards compatibility - - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-${POSTGRES_PORT:-5432}} # Eventually get rid of POSTGRES_PORT, but for now keep it for backwards compatibility - - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-${POSTGRES_DBNAME:-postgres}} # Eventually get rid of POSTGRES_DBNAME, but for now keep it for backwards compatibility - - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-${POSTGRES_MAX_CONNECTIONS:-1024}} # Eventually get rid of POSTGRES_MAX_CONNECTIONS, but for now keep it for backwards compatibility - - R2R_POSTGRES_PROJECT_NAME=${R2R_POSTGRES_PROJECT_NAME:-${POSTGRES_PROJECT_NAME:-r2r_default}} # Eventually get rid of POSTGRES_PROJECT_NAME, but for now keep it for backwards compatibility + - R2R_POSTGRES_USER=${R2R_POSTGRES_USER:-postgres} + - R2R_POSTGRES_PASSWORD=${R2R_POSTGRES_PASSWORD:-postgres} + - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres} + - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432} + - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres} + - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024} + - R2R_POSTGRES_PROJECT_NAME=${R2R_POSTGRES_PROJECT_NAME:-r2r_default} # OpenAI - OPENAI_API_KEY=${OPENAI_API_KEY:-} @@ -103,7 +103,7 @@ services: networks: - r2r-network healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:${R2R_PORT:-7272}/v2/health"] + test: ["CMD", "curl", "-f", "http://localhost:${R2R_PORT:-7272}/v3/health"] interval: 6s timeout: 5s retries: 5 diff --git a/py/core/__init__.py b/py/core/__init__.py index a0e654f97..616ebe0ac 100644 --- a/py/core/__init__.py +++ b/py/core/__init__.py @@ -1,7 +1,6 @@ import logging # Keep '*' imports for enhanced development velocity -# corresponding flake8 error codes are F403, F405 from .agent import * from .base import * from .main import * @@ -48,10 +47,9 @@ # Completion abstractions "MessageType", # Document abstractions - "DataType", "Document", - "DocumentExtraction", - "DocumentInfo", + "DocumentChunk", + "DocumentResponse", "IngestionStatus", "KGExtractionStatus", "KGEnrichmentStatus", @@ -65,7 +63,7 @@ # KG abstractions "Entity", "KGExtraction", - "Triple", + "Relationship", # LLM abstractions "GenerationConfig", "LLMChatCompletion", @@ -75,19 +73,20 @@ "Prompt", # Search abstractions "AggregateSearchResult", - "KGSearchResult", - "KGSearchSettings", - "VectorSearchResult", + "GraphSearchResult", + "ChunkSearchSettings", + "GraphSearchSettings", + "ChunkSearchResult", "SearchSettings", "HybridSearchSettings", # User abstractions "Token", "TokenData", - "UserStats", # Vector abstractions "Vector", "VectorEntry", "VectorType", + "IndexConfig", ## AGENT # Agent abstractions "Agent", @@ -98,9 +97,8 @@ "ToolResult", ## API # Auth Responses - "GenericMessageResponse", "TokenResponse", - "UserResponse", + "User", ## LOGGING # Basic types "RunType", @@ -151,12 +149,8 @@ "TextSplitter", "run_pipeline", "to_async_generator", - "generate_run_id", + "generate_id", "increment_version", - "EntityType", - "RelationshipType", - "format_entity_types", - "format_relations", "validate_uuid", ## MAIN ## R2R ABSTRACTIONS @@ -180,10 +174,6 @@ "R2RAgentFactory", # R2R Routers "AuthRouter", - "IngestionRouter", - "ManagementRouter", - "RetrievalRouter", - "KGRouter", ## R2R SERVICES "AuthService", "IngestionService", @@ -215,7 +205,7 @@ ## PIPES "SearchPipe", "EmbeddingPipe", - "KGTriplesExtractionPipe", + "KGExtractionPipe", "ParsingPipe", "QueryTransformPipe", "SearchRAGPipe", diff --git a/py/core/agent/rag.py b/py/core/agent/rag.py index 8bc56c419..403f89aa4 100644 --- a/py/core/agent/rag.py +++ b/py/core/agent/rag.py @@ -7,7 +7,7 @@ ) from core.base.abstractions import ( AggregateSearchResult, - KGSearchSettings, + GraphSearchSettings, SearchSettings, ) from core.base.agent import AgentConfig, Tool @@ -57,16 +57,14 @@ def search_tool(self) -> Tool: async def search( self, query: str, - vector_search_settings: SearchSettings, - kg_search_settings: KGSearchSettings, + search_settings: SearchSettings, *args, **kwargs, ) -> list[AggregateSearchResult]: response = await self.search_pipeline.run( to_async_generator([query]), state=None, - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, + search_settings=search_settings, ) return response diff --git a/py/core/base/__init__.py b/py/core/base/__init__.py index 32abbbca1..be447e208 100644 --- a/py/core/base/__init__.py +++ b/py/core/base/__init__.py @@ -16,10 +16,9 @@ # Completion abstractions "MessageType", # Document abstractions - "DataType", "Document", - "DocumentExtraction", - "DocumentInfo", + "DocumentChunk", + "DocumentResponse", "IngestionStatus", "KGExtractionStatus", "KGEnrichmentStatus", @@ -33,7 +32,11 @@ # KG abstractions "Entity", "KGExtraction", - "Triple", + "Relationship", + "Community", + "KGCreationSettings", + "KGEnrichmentSettings", + "KGRunType", # LLM abstractions "GenerationConfig", "LLMChatCompletion", @@ -43,24 +46,21 @@ "Prompt", # Search abstractions "AggregateSearchResult", - "KGSearchResult", - "KGSearchSettings", - "VectorSearchResult", + "GraphSearchResult", + "GraphSearchSettings", + "ChunkSearchSettings", + "ChunkSearchResult", "SearchSettings", "HybridSearchSettings", - # KG abstractions - "KGCreationSettings", - "KGEnrichmentSettings", - "KGRunType", # User abstractions "Token", "TokenData", - "UserStats", # Vector abstractions "Vector", "VectorEntry", "VectorType", "StorageResult", + "IndexConfig", ## AGENT # Agent abstractions "Agent", @@ -71,9 +71,8 @@ "ToolResult", ## API # Auth Responses - "GenericMessageResponse", "TokenResponse", - "UserResponse", + "User", ## LOGGING # Basic types "RunType", @@ -132,15 +131,10 @@ "format_search_results_for_stream", "validate_uuid", # ID generation - "generate_run_id", + "generate_id", "generate_document_id", "generate_extraction_id", "generate_default_user_collection_id", - "generate_collection_id_from_name", "generate_user_id", "increment_version", - "EntityType", - "RelationshipType", - "format_entity_types", - "format_relations", ] diff --git a/py/core/base/abstractions/__init__.py b/py/core/base/abstractions/__init__.py index b49220e3c..479772452 100644 --- a/py/core/base/abstractions/__init__.py +++ b/py/core/base/abstractions/__init__.py @@ -1,14 +1,15 @@ from shared.abstractions.base import AsyncSyncMeta, R2RSerializable, syncable from shared.abstractions.document import ( - DataType, Document, - DocumentExtraction, - DocumentInfo, + DocumentChunk, + DocumentResponse, DocumentType, IngestionStatus, KGEnrichmentStatus, KGExtractionStatus, RawChunk, + UnprocessedChunk, + UpdateChunk, ) from shared.abstractions.embedding import ( EmbeddingPurpose, @@ -20,20 +21,20 @@ ) from shared.abstractions.graph import ( Community, - CommunityInfo, - CommunityReport, Entity, - EntityLevel, - EntityType, + Graph, KGExtraction, - RelationshipType, - Triple, + Relationship, ) from shared.abstractions.ingestion import ( ChunkEnrichmentSettings, ChunkEnrichmentStrategy, ) from shared.abstractions.kg import ( + GraphBuildSettings, + GraphCommunitySettings, + GraphEntitySettings, + GraphRelationshipSettings, KGCreationSettings, KGEnrichmentSettings, KGEntityDeduplicationSettings, @@ -51,22 +52,23 @@ from shared.abstractions.prompt import Prompt from shared.abstractions.search import ( AggregateSearchResult, + ChunkSearchResult, + ChunkSearchSettings, + GraphSearchResult, + GraphSearchSettings, HybridSearchSettings, KGCommunityResult, KGEntityResult, KGGlobalResult, KGRelationshipResult, - KGSearchMethod, - KGSearchResult, KGSearchResultType, - KGSearchSettings, SearchSettings, - VectorSearchResult, ) -from shared.abstractions.user import Token, TokenData, UserStats +from shared.abstractions.user import Token, TokenData, User from shared.abstractions.vector import ( IndexArgsHNSW, IndexArgsIVFFlat, + IndexConfig, IndexMeasure, IndexMethod, StorageResult, @@ -86,15 +88,16 @@ # Completion abstractions "MessageType", # Document abstractions - "DataType", "Document", - "DocumentExtraction", - "DocumentInfo", + "DocumentChunk", + "DocumentResponse", "DocumentType", "IngestionStatus", "KGExtractionStatus", "KGEnrichmentStatus", "RawChunk", + "UnprocessedChunk", + "UpdateChunk", # Embedding abstractions "EmbeddingPurpose", "default_embedding_prefixes", @@ -103,13 +106,11 @@ "R2RException", # Graph abstractions "Entity", - "EntityType", - "RelationshipType", "Community", - "CommunityReport", "KGExtraction", - "Triple", - "EntityLevel", + "Relationship", + # Index abstractions + "IndexConfig", # LLM abstractions "GenerationConfig", "LLMChatCompletion", @@ -120,27 +121,31 @@ "Prompt", # Search abstractions "AggregateSearchResult", - "KGSearchResult", - "KGSearchMethod", + "GraphSearchResult", "KGSearchResultType", "KGEntityResult", "KGRelationshipResult", "KGCommunityResult", "KGGlobalResult", - "KGSearchSettings", - "VectorSearchResult", + "GraphSearchSettings", + "ChunkSearchSettings", + "ChunkSearchResult", "SearchSettings", "HybridSearchSettings", # KG abstractions "KGCreationSettings", "KGEnrichmentSettings", "KGEntityDeduplicationSettings", + "GraphBuildSettings", + "GraphEntitySettings", + "GraphRelationshipSettings", + "GraphCommunitySettings", "KGEntityDeduplicationType", "KGRunType", # User abstractions "Token", "TokenData", - "UserStats", + "User", # Vector abstractions "Vector", "VectorEntry", diff --git a/py/core/base/agent/agent.py b/py/core/base/agent/agent.py index 11eb272ff..6758512dd 100644 --- a/py/core/base/agent/agent.py +++ b/py/core/base/agent/agent.py @@ -2,7 +2,7 @@ import json import logging from abc import ABC, abstractmethod -from typing import Any, AsyncGenerator, Dict, List, Optional, Type, Union +from typing import Any, AsyncGenerator, Optional, Type from pydantic import BaseModel @@ -21,16 +21,16 @@ class Conversation: def __init__(self): - self.messages: List[Message] = [] + self.messages: list[Message] = [] self._lock = asyncio.Lock() def create_and_add_message( self, - role: Union[MessageType, str], + role: MessageType | str, content: Optional[str] = None, name: Optional[str] = None, - function_call: Optional[Dict[str, Any]] = None, - tool_calls: Optional[List[Dict[str, Any]]] = None, + function_call: Optional[dict[str, Any]] = None, + tool_calls: Optional[list[dict[str, Any]]] = None, ): message = Message( role=role, @@ -91,12 +91,17 @@ def _register_tools(self): pass async def _setup(self, system_instruction: Optional[str] = None): + content = system_instruction or ( + await self.database_provider.get_cached_prompt( + self.config.system_instruction_name + ) + ) await self.conversation.add_message( Message( role="system", content=system_instruction or ( - await self.database_provider.get_prompt( + await self.database_provider.get_cached_prompt( self.config.system_instruction_name ) ), @@ -118,9 +123,7 @@ async def arun( messages: Optional[list[Message]] = None, *args, **kwargs, - ) -> Union[ - list[LLMChatCompletion], AsyncGenerator[LLMChatCompletion, None] - ]: + ) -> list[LLMChatCompletion] | AsyncGenerator[LLMChatCompletion, None]: pass @abstractmethod @@ -129,7 +132,7 @@ async def process_llm_response( response: Any, *args, **kwargs, - ) -> Union[None, AsyncGenerator[str, None]]: + ) -> None | AsyncGenerator[str, None]: pass async def execute_tool(self, tool_name: str, *args, **kwargs) -> str: diff --git a/py/core/base/agent/base.py b/py/core/base/agent/base.py index 8b347f31e..0d8f15ee1 100644 --- a/py/core/base/agent/base.py +++ b/py/core/base/agent/base.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Optional from ..abstractions import R2RSerializable @@ -9,9 +9,10 @@ class Tool(R2RSerializable): results_function: Callable llm_format_function: Callable stream_function: Optional[Callable] = None - parameters: Optional[Dict[str, Any]] = None + parameters: Optional[dict[str, Any]] = None class Config: + populate_by_name = True arbitrary_types_allowed = True diff --git a/py/core/base/api/models/__init__.py b/py/core/base/api/models/__init__.py index 9b7590f42..a60e49c65 100644 --- a/py/core/base/api/models/__init__.py +++ b/py/core/base/api/models/__init__.py @@ -1,90 +1,87 @@ from shared.api.models.auth.responses import ( - GenericMessageResponse, TokenResponse, - UserResponse, - WrappedGenericMessageResponse, WrappedTokenResponse, - WrappedUserResponse, +) +from shared.api.models.base import ( + GenericBooleanResponse, + GenericMessageResponse, + PaginatedResultsWrapper, + ResultsWrapper, + WrappedBooleanResponse, + WrappedGenericMessageResponse, ) from shared.api.models.ingestion.responses import ( - CreateVectorIndexResponse, IngestionResponse, UpdateResponse, - WrappedCreateVectorIndexResponse, - WrappedDeleteVectorIndexResponse, WrappedIngestionResponse, WrappedListVectorIndicesResponse, WrappedMetadataUpdateResponse, - WrappedSelectVectorIndexResponse, WrappedUpdateResponse, ) -from shared.api.models.kg.responses import ( - KGCreationEstimationResponse, - KGCreationResponse, - KGDeduplicationEstimationResponse, - KGEnrichmentEstimationResponse, - KGEnrichmentResponse, - KGEntityDeduplicationResponse, - WrappedKGCommunitiesResponse, - WrappedKGCreationResponse, - WrappedKGEnrichmentResponse, - WrappedKGEntitiesResponse, - WrappedKGEntityDeduplicationResponse, - WrappedKGTriplesResponse, - WrappedKGTunePromptResponse, +from shared.api.models.kg.responses import ( # TODO: Need to review anything above this + Community, + Entity, + GraphResponse, + Relationship, + WrappedCommunitiesResponse, + WrappedCommunityResponse, + WrappedEntitiesResponse, + WrappedEntityResponse, + WrappedGraphResponse, + WrappedGraphsResponse, + WrappedRelationshipResponse, + WrappedRelationshipsResponse, ) -from shared.api.models.management.responses import ( +from shared.api.models.management.responses import ( # Document Responses; Prompt Responses; Chunk Responses; Conversation Responses; User Responses; TODO: anything below this hasn't been reviewed AnalyticsResponse, - AppSettingsResponse, - CollectionOverviewResponse, + ChunkResponse, CollectionResponse, - ConversationOverviewResponse, - DocumentChunkResponse, - DocumentOverviewResponse, + ConversationResponse, LogResponse, PromptResponse, - ScoreCompletionResponse, ServerStats, - UserOverviewResponse, - WrappedAddUserResponse, + SettingsResponse, + User, WrappedAnalyticsResponse, - WrappedAppSettingsResponse, - WrappedCollectionListResponse, - WrappedCollectionOverviewResponse, + WrappedBranchesResponse, + WrappedBranchResponse, + WrappedChunkResponse, + WrappedChunksResponse, WrappedCollectionResponse, + WrappedCollectionsResponse, + WrappedConversationMessagesResponse, WrappedConversationResponse, - WrappedConversationsOverviewResponse, - WrappedDeleteResponse, - WrappedDocumentChunkResponse, - WrappedDocumentOverviewResponse, - WrappedGetPromptsResponse, - WrappedLogResponse, - WrappedPromptMessageResponse, + WrappedConversationsResponse, + WrappedDocumentResponse, + WrappedDocumentsResponse, + WrappedLogsResponse, + WrappedMessageResponse, + WrappedMessagesResponse, + WrappedPromptResponse, + WrappedPromptsResponse, WrappedResetDataResult, WrappedServerStatsResponse, - WrappedUserCollectionResponse, - WrappedUserOverviewResponse, - WrappedUsersInCollectionResponse, + WrappedSettingsResponse, + WrappedUserResponse, + WrappedUsersResponse, WrappedVerificationResult, ) from shared.api.models.retrieval.responses import ( - RAGAgentResponse, + AgentResponse, + CombinedSearchResponse, RAGResponse, - SearchResponse, + WrappedAgentResponse, WrappedCompletionResponse, WrappedDocumentSearchResponse, - WrappedRAGAgentResponse, WrappedRAGResponse, WrappedSearchResponse, + WrappedVectorSearchResponse, ) __all__ = [ # Auth Responses - "GenericMessageResponse", "TokenResponse", - "UserResponse", "WrappedTokenResponse", - "WrappedUserResponse", "WrappedVerificationResult", "WrappedGenericMessageResponse", "WrappedResetDataResult", @@ -93,64 +90,74 @@ "WrappedIngestionResponse", "WrappedUpdateResponse", "WrappedMetadataUpdateResponse", - "CreateVectorIndexResponse", - "WrappedCreateVectorIndexResponse", "WrappedListVectorIndicesResponse", - "WrappedDeleteVectorIndexResponse", - "WrappedSelectVectorIndexResponse", "UpdateResponse", # Knowledge Graph Responses - "KGCreationResponse", - "WrappedKGCreationResponse", - "KGEnrichmentResponse", - "WrappedKGEnrichmentResponse", - "KGEntityDeduplicationResponse", - "WrappedKGEntityDeduplicationResponse", - "WrappedKGTunePromptResponse", - "KGCreationEstimationResponse", - "KGDeduplicationEstimationResponse", - "KGEnrichmentEstimationResponse", + "Entity", + "Relationship", + "Community", + "WrappedEntityResponse", + "WrappedEntitiesResponse", + "WrappedRelationshipResponse", + "WrappedRelationshipsResponse", + "WrappedCommunityResponse", + "WrappedCommunitiesResponse", + # TODO: Need to review anything above this + "GraphResponse", + "WrappedGraphResponse", + "WrappedGraphsResponse", # Management Responses "PromptResponse", "ServerStats", "LogResponse", "AnalyticsResponse", - "AppSettingsResponse", - "ScoreCompletionResponse", - "UserOverviewResponse", - "DocumentOverviewResponse", - "DocumentChunkResponse", + "SettingsResponse", + "ChunkResponse", "CollectionResponse", - "CollectionOverviewResponse", - "ConversationOverviewResponse", - "WrappedPromptMessageResponse", "WrappedServerStatsResponse", - "WrappedLogResponse", + "WrappedLogsResponse", "WrappedAnalyticsResponse", - "WrappedAppSettingsResponse", - "WrappedUserOverviewResponse", - "WrappedConversationResponse", - "WrappedDocumentChunkResponse", - "WrappedDocumentOverviewResponse", - "WrappedDocumentChunkResponse", + "WrappedSettingsResponse", + "WrappedDocumentResponse", + "WrappedDocumentsResponse", "WrappedCollectionResponse", - "WrappedDocumentChunkResponse", - "WrappedCollectionListResponse", - "WrappedAddUserResponse", - "WrappedUsersInCollectionResponse", - "WrappedGetPromptsResponse", - "WrappedUserCollectionResponse", - "WrappedDocumentChunkResponse", - "WrappedCollectionOverviewResponse", - "WrappedDeleteResponse", - "WrappedConversationsOverviewResponse", + "WrappedCollectionsResponse", + # Conversation Responses + "ConversationResponse", + "WrappedConversationMessagesResponse", + "WrappedConversationResponse", + "WrappedConversationsResponse", + # Prompt Responses + "WrappedPromptResponse", + "WrappedPromptsResponse", + # Conversation Responses + "WrappedMessageResponse", + "WrappedMessagesResponse", + "WrappedBranchResponse", + "WrappedBranchesResponse", + # Chunk Responses + "WrappedChunkResponse", + "WrappedChunksResponse", + # User Responses + "User", + "WrappedUserResponse", + "WrappedUsersResponse", + # Base Responses + "PaginatedResultsWrapper", + "ResultsWrapper", + "GenericBooleanResponse", + "GenericMessageResponse", + "WrappedBooleanResponse", + "WrappedGenericMessageResponse", + # TODO: This needs to be cleaned up # Retrieval Responses - "SearchResponse", + "CombinedSearchResponse", "RAGResponse", - "RAGAgentResponse", + "AgentResponse", "WrappedDocumentSearchResponse", "WrappedSearchResponse", + "WrappedVectorSearchResponse", "WrappedCompletionResponse", "WrappedRAGResponse", - "WrappedRAGAgentResponse", + "WrappedAgentResponse", ] diff --git a/py/core/base/logger/log_processor.py b/py/core/base/logger/log_processor.py index a3f4ede7a..994a9c5d9 100644 --- a/py/core/base/logger/log_processor.py +++ b/py/core/base/logger/log_processor.py @@ -3,7 +3,7 @@ import logging import statistics from collections import defaultdict -from typing import Any, Callable, Dict, List, Optional, Sequence +from typing import Any, Callable, Optional, Sequence from pydantic import BaseModel @@ -17,11 +17,11 @@ class LogFilterCriteria(BaseModel): class LogProcessor: timestamp_format = "%Y-%m-%d %H:%M:%S" - def __init__(self, filters: Dict[str, Callable[[Dict[str, Any]], bool]]): + def __init__(self, filters: dict[str, Callable[[dict[str, Any]], bool]]): self.filters = filters self.populations: dict = {name: [] for name in filters} - def process_log(self, log: Dict[str, Any]): + def process_log(self, log: dict[str, Any]): for name, filter_func in self.filters.items(): if filter_func(log): self.populations[name].append(log) @@ -30,9 +30,9 @@ def process_log(self, log: Dict[str, Any]): class StatisticsCalculator: @staticmethod def calculate_statistics( - population: List[Dict[str, Any]], - stat_functions: Dict[str, Callable[[List[Dict[str, Any]]], Any]], - ) -> Dict[str, Any]: + population: list[dict[str, Any]], + stat_functions: dict[str, Callable[[list[dict[str, Any]]], Any]], + ) -> dict[str, Any]: return { name: func(population) for name, func in stat_functions.items() } @@ -41,9 +41,9 @@ def calculate_statistics( class DistributionGenerator: @staticmethod def generate_distributions( - population: List[Dict[str, Any]], - dist_functions: Dict[str, Callable[[List[Dict[str, Any]]], Any]], - ) -> Dict[str, Any]: + population: list[dict[str, Any]], + dist_functions: dict[str, Callable[[list[dict[str, Any]]], Any]], + ) -> dict[str, Any]: return { name: func(population) for name, func in dist_functions.items() } @@ -52,9 +52,9 @@ def generate_distributions( class VisualizationPreparer: @staticmethod def prepare_visualization_data( - data: Dict[str, Any], - vis_functions: Dict[str, Callable[[Dict[str, Any]], Any]], - ) -> Dict[str, Any]: + data: dict[str, Any], + vis_functions: dict[str, Callable[[dict[str, Any]], Any]], + ) -> dict[str, Any]: return {name: func(data) for name, func in vis_functions.items()} @@ -162,7 +162,7 @@ def calculate_percentile(logs, key, percentile): class LogAnalytics: - def __init__(self, logs: List[Dict[str, Any]], config: LogAnalyticsConfig): + def __init__(self, logs: list[dict[str, Any]], config: LogAnalyticsConfig): self.logs = logs self.log_processor = LogProcessor(config.filters) self.statistics_calculator = StatisticsCalculator() @@ -170,14 +170,14 @@ def __init__(self, logs: List[Dict[str, Any]], config: LogAnalyticsConfig): self.visualization_preparer = VisualizationPreparer() self.config = config - def count_logs(self) -> Dict[str, Any]: + def count_logs(self) -> dict[str, Any]: """Count the logs for each filter.""" return { name: len(population) for name, population in self.log_processor.populations.items() } - def process_logs(self) -> Dict[str, Any]: + def process_logs(self) -> dict[str, Any]: for log in self.logs: self.log_processor.process_log(log) diff --git a/py/core/base/logger/run_manager.py b/py/core/base/logger/run_manager.py index 312068f61..29a9a0a58 100644 --- a/py/core/base/logger/run_manager.py +++ b/py/core/base/logger/run_manager.py @@ -4,13 +4,13 @@ from typing import Optional from uuid import UUID -from core.base.api.models import UserResponse +from core.base.api.models import User from core.base.logger.base import RunType -from core.base.utils import generate_run_id +from core.base.utils import generate_id from .base import PersistentLoggingProvider -run_id_var = contextvars.ContextVar("run_id", default=generate_run_id()) +run_id_var = contextvars.ContextVar("run_id", default=generate_id()) class RunManager: @@ -21,7 +21,7 @@ def __init__(self, logger: PersistentLoggingProvider): async def set_run_info(self, run_type: str, run_id: Optional[UUID] = None): run_id = run_id or run_id_var.get() if run_id is None: - run_id = generate_run_id() + run_id = generate_id() token = run_id_var.set(run_id) self.run_info[run_id] = {"run_type": run_type} else: @@ -35,7 +35,7 @@ async def get_info_logs(self): async def log_run_info( self, run_type: RunType, - user: UserResponse, + user: User, ): if asyncio.iscoroutine(user): user = await user diff --git a/py/core/base/providers/__init__.py b/py/core/base/providers/__init__.py index 37af2b8f3..1825ebcc1 100644 --- a/py/core/base/providers/__init__.py +++ b/py/core/base/providers/__init__.py @@ -2,19 +2,19 @@ from .base import AppConfig, Provider, ProviderConfig from .crypto import CryptoConfig, CryptoProvider from .database import ( - CollectionHandler, + ChunkHandler, + CollectionsHandler, DatabaseConfig, DatabaseConnectionManager, DatabaseProvider, DocumentHandler, FileHandler, - KGHandler, + GraphHandler, LoggingHandler, PostgresConfigurationSettings, PromptHandler, TokenHandler, UserHandler, - VectorHandler, ) from .email import EmailConfig, EmailProvider from .embedding import EmbeddingConfig, EmbeddingProvider @@ -43,12 +43,12 @@ # Database providers "DatabaseConnectionManager", "DocumentHandler", - "CollectionHandler", + "CollectionsHandler", "TokenHandler", "UserHandler", "LoggingHandler", - "VectorHandler", - "KGHandler", + "ChunkHandler", + "GraphHandler", "PromptHandler", "FileHandler", "DatabaseConfig", diff --git a/py/core/base/providers/auth.py b/py/core/base/providers/auth.py index fb23f7215..767d498fc 100644 --- a/py/core/base/providers/auth.py +++ b/py/core/base/providers/auth.py @@ -5,9 +5,8 @@ from fastapi import Security from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer -from ...utils import generate_user_id from ..abstractions import R2RException, Token, TokenData -from ..api.models import UserResponse +from ..api.models import User from .base import Provider, ProviderConfig from .crypto import CryptoProvider from .database import DatabaseProvider @@ -59,17 +58,8 @@ def __init__( super().__init__(config) self.config: AuthConfig = config # for type hinting - def _get_default_admin_user(self) -> UserResponse: - return UserResponse( - id=generate_user_id(self.admin_email), - email=self.admin_email, - hashed_password=self.crypto_provider.get_password_hash( - self.admin_password - ), - is_superuser=True, - is_active=True, - is_verified=True, - ) + async def _get_default_admin_user(self) -> User: + return await self.database_provider.get_user_by_email(self.admin_email) @abstractmethod def create_access_token(self, data: dict) -> str: @@ -84,17 +74,15 @@ async def decode_token(self, token: str) -> TokenData: pass @abstractmethod - async def user(self, token: str) -> UserResponse: + async def user(self, token: str) -> User: pass @abstractmethod - def get_current_active_user( - self, current_user: UserResponse - ) -> UserResponse: + def get_current_active_user(self, current_user: User) -> User: pass @abstractmethod - async def register(self, email: str, password: str) -> UserResponse: + async def register(self, email: str, password: str) -> User: pass @abstractmethod @@ -115,9 +103,9 @@ async def refresh_access_token( async def auth_wrapper( self, auth: Optional[HTTPAuthorizationCredentials] = Security(security) - ) -> UserResponse: + ) -> User: if not self.config.require_authentication and auth is None: - return self._get_default_admin_user() + return await self._get_default_admin_user() if auth is None: raise R2RException( @@ -135,7 +123,7 @@ async def auth_wrapper( @abstractmethod async def change_password( - self, user: UserResponse, current_password: str, new_password: str + self, user: User, current_password: str, new_password: str ) -> dict[str, str]: pass diff --git a/py/core/base/providers/base.py b/py/core/base/providers/base.py index b2cb39926..0dfa8c53f 100644 --- a/py/core/base/providers/base.py +++ b/py/core/base/providers/base.py @@ -23,6 +23,7 @@ class ProviderConfig(BaseModel, ABC): provider: Optional[str] = None class Config: + populate_by_name = True arbitrary_types_allowed = True ignore_extra = True diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index 83432c8f0..df57cad38 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -2,31 +2,16 @@ from abc import ABC, abstractmethod from datetime import datetime from io import BytesIO -from typing import ( - Any, - AsyncGenerator, - BinaryIO, - Dict, - List, - Optional, - Sequence, - Tuple, - Union, -) +from typing import Any, BinaryIO, Optional, Sequence, Tuple from uuid import UUID from pydantic import BaseModel -from core.base import ( - CommunityReport, - Entity, - KGExtraction, - Message, - Triple, - VectorEntry, -) from core.base.abstractions import ( - DocumentInfo, + ChunkSearchResult, + Community, + DocumentResponse, + Entity, IndexArgsHNSW, IndexArgsIVFFlat, IndexMeasure, @@ -34,22 +19,14 @@ KGCreationSettings, KGEnrichmentSettings, KGEntityDeduplicationSettings, + Message, + Relationship, SearchSettings, - UserStats, + User, VectorEntry, - VectorQuantizationType, - VectorSearchResult, VectorTableName, ) -from core.base.api.models import ( - CollectionOverviewResponse, - CollectionResponse, - KGCreationEstimationResponse, - KGDeduplicationEstimationResponse, - KGEnrichmentEstimationResponse, - UserResponse, -) -from core.base.utils import _decorate_vector_type +from core.base.api.models import CollectionResponse, GraphResponse from ..logger import RunInfoLog from ..logger.base import RunType @@ -63,15 +40,14 @@ from uuid import UUID from ..abstractions import ( - CommunityReport, + Community, Entity, + GraphSearchSettings, KGCreationSettings, KGEnrichmentSettings, KGEntityDeduplicationSettings, KGExtraction, - KGSearchSettings, - RelationshipType, - Triple, + Relationship, ) from .base import ProviderConfig @@ -137,12 +113,12 @@ class DatabaseConfig(ProviderConfig): # KG settings batch_size: Optional[int] = 1 kg_store_path: Optional[str] = None - kg_enrichment_settings: KGEnrichmentSettings = KGEnrichmentSettings() - kg_creation_settings: KGCreationSettings = KGCreationSettings() - kg_entity_deduplication_settings: KGEntityDeduplicationSettings = ( + graph_enrichment_settings: KGEnrichmentSettings = KGEnrichmentSettings() + graph_creation_settings: KGCreationSettings = KGCreationSettings() + graph_entity_deduplication_settings: KGEntityDeduplicationSettings = ( KGEntityDeduplicationSettings() ) - kg_search_settings: KGSearchSettings = KGSearchSettings() + graph_search_settings: GraphSearchSettings = GraphSearchSettings() def __post_init__(self): self.validate_config() @@ -164,7 +140,7 @@ class DatabaseConnectionManager(ABC): def execute_query( self, query: str, - params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, + params: Optional[dict[str, Any] | Sequence[Any]] = None, isolation_level: Optional[str] = None, ): pass @@ -177,7 +153,7 @@ async def execute_many(self, query, params=None, batch_size=1000): def fetch_query( self, query: str, - params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, + params: Optional[dict[str, Any] | Sequence[Any]] = None, ): pass @@ -185,7 +161,7 @@ def fetch_query( def fetchrow_query( self, query: str, - params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, + params: Optional[dict[str, Any] | Sequence[Any]] = None, ): pass @@ -196,7 +172,9 @@ async def initialize(self, pool: Any): class Handler(ABC): def __init__( - self, project_name: str, connection_manager: DatabaseConnectionManager + self, + project_name: str, + connection_manager: DatabaseConnectionManager, ): self.project_name = project_name self.connection_manager = connection_manager @@ -213,36 +191,44 @@ class DocumentHandler(Handler): @abstractmethod async def upsert_documents_overview( - self, documents_overview: Union[DocumentInfo, list[DocumentInfo]] + self, + documents_overview: DocumentResponse | list[DocumentResponse], ) -> None: pass @abstractmethod async def delete_from_documents_overview( - self, document_id: UUID, version: Optional[str] = None + self, + document_id: UUID, + version: Optional[str] = None, ) -> None: pass @abstractmethod async def get_documents_overview( self, + offset: int, + limit: int, filter_user_ids: Optional[list[UUID]] = None, filter_document_ids: Optional[list[UUID]] = None, filter_collection_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = -1, ) -> dict[str, Any]: pass @abstractmethod async def get_workflow_status( - self, id: Union[UUID, list[UUID]], status_type: str + self, + id: UUID | list[UUID], + status_type: str, ): pass @abstractmethod async def set_workflow_status( - self, id: Union[UUID, list[UUID]], status_type: str, status: str + self, + id: UUID | list[UUID], + status_type: str, + status: str, ): pass @@ -250,7 +236,7 @@ async def set_workflow_status( async def get_document_ids_by_status( self, status_type: str, - status: Union[str, list[str]], + status: str | list[str], collection_id: Optional[UUID] = None, ): pass @@ -261,17 +247,11 @@ async def search_documents( query_text: str, query_embedding: Optional[list[float]] = None, search_settings: Optional[SearchSettings] = None, - ) -> list[DocumentInfo]: + ) -> list[DocumentResponse]: pass -class CollectionHandler(Handler): - @abstractmethod - async def create_default_collection( - self, user_id: Optional[UUID] = None - ) -> CollectionResponse: - pass - +class CollectionsHandler(Handler): @abstractmethod async def collection_exists(self, collection_id: UUID) -> bool: pass @@ -279,16 +259,13 @@ async def collection_exists(self, collection_id: UUID) -> bool: @abstractmethod async def create_collection( self, - name: str, + owner_id: UUID, + name: Optional[str] = None, description: str = "", collection_id: Optional[UUID] = None, ) -> CollectionResponse: pass - @abstractmethod - async def get_collection(self, collection_id: UUID) -> CollectionResponse: - pass - @abstractmethod async def update_collection( self, @@ -302,38 +279,21 @@ async def update_collection( async def delete_collection_relational(self, collection_id: UUID) -> None: pass - @abstractmethod - async def list_collections( - self, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[CollectionResponse], int]]: - """List collections with pagination.""" - pass - - @abstractmethod - async def get_collections_by_ids( - self, collection_ids: list[UUID] - ) -> list[CollectionResponse]: - pass - @abstractmethod async def documents_in_collection( - self, collection_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[DocumentInfo], int]]: + self, collection_id: UUID, offset: int, limit: int + ) -> dict[str, list[DocumentResponse] | int]: pass @abstractmethod async def get_collections_overview( self, - collection_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict[str, Union[list[CollectionOverviewResponse], int]]: - pass - - @abstractmethod - async def get_collections_for_user( - self, user_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[CollectionResponse], int]]: + offset: int, + limit: int, + filter_user_ids: Optional[list[UUID]] = None, + filter_document_ids: Optional[list[UUID]] = None, + filter_collection_ids: Optional[list[UUID]] = None, + ) -> dict[str, list[CollectionResponse] | int]: pass @abstractmethod @@ -344,12 +304,6 @@ async def assign_document_to_collection_relational( ) -> UUID: pass - @abstractmethod - async def document_collections( - self, document_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[CollectionResponse], int]]: - pass - @abstractmethod async def remove_document_from_collection_relational( self, document_id: UUID, collection_id: UUID @@ -386,19 +340,21 @@ class UserHandler(Handler): TABLE_NAME = "users" @abstractmethod - async def get_user_by_id(self, user_id: UUID) -> UserResponse: + async def get_user_by_id(self, user_id: UUID) -> User: pass @abstractmethod - async def get_user_by_email(self, email: str) -> UserResponse: + async def get_user_by_email(self, email: str) -> User: pass @abstractmethod - async def create_user(self, email: str, password: str) -> UserResponse: + async def create_user( + self, email: str, password: str, is_superuser: bool + ) -> User: pass @abstractmethod - async def update_user(self, user: UserResponse) -> UserResponse: + async def update_user(self, user: User) -> User: pass @abstractmethod @@ -412,7 +368,7 @@ async def update_user_password( pass @abstractmethod - async def get_all_users(self) -> list[UserResponse]: + async def get_all_users(self) -> list[User]: pass @abstractmethod @@ -456,19 +412,19 @@ async def remove_user_from_all_collections(self, user_id: UUID): @abstractmethod async def add_user_to_collection( self, user_id: UUID, collection_id: UUID - ) -> None: + ) -> bool: pass @abstractmethod async def remove_user_from_collection( self, user_id: UUID, collection_id: UUID - ) -> None: + ) -> bool: pass @abstractmethod async def get_users_in_collection( - self, collection_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[UserResponse], int]]: + self, collection_id: UUID, offset: int, limit: int + ) -> dict[str, list[User] | int]: pass @abstractmethod @@ -488,15 +444,16 @@ async def mark_user_as_verified(self, user_id: UUID): @abstractmethod async def get_users_overview( self, + offset: int, + limit: int, user_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict[str, Union[list[UserStats], int]]: + ) -> dict[str, list[User] | int]: pass @abstractmethod async def get_user_validation_data( - self, user_id: UUID, *args, **kwargs + self, + user_id: UUID, ) -> dict: """ Get verification data for a specific user. @@ -505,7 +462,7 @@ async def get_user_validation_data( pass -class VectorHandler(Handler): +class ChunkHandler(Handler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -520,13 +477,13 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: @abstractmethod async def semantic_search( self, query_vector: list[float], search_settings: SearchSettings - ) -> list[VectorSearchResult]: + ) -> list[ChunkSearchResult]: pass @abstractmethod async def full_text_search( self, query_text: str, search_settings: SearchSettings - ) -> list[VectorSearchResult]: + ) -> list[ChunkSearchResult]: pass @abstractmethod @@ -537,7 +494,7 @@ async def hybrid_search( search_settings: SearchSettings, *args, **kwargs, - ) -> list[VectorSearchResult]: + ) -> list[ChunkSearchResult]: pass @abstractmethod @@ -567,17 +524,17 @@ async def delete_collection_vector(self, collection_id: UUID) -> None: pass @abstractmethod - async def get_document_chunks( + async def list_document_chunks( self, document_id: UUID, - offset: int = 0, - limit: int = -1, + offset: int, + limit: int, include_vectors: bool = False, ) -> dict[str, Any]: pass @abstractmethod - async def get_chunk(self, extraction_id: UUID) -> Optional[dict[str, Any]]: + async def get_chunk(self, chunk_id: UUID) -> dict: pass @abstractmethod @@ -586,9 +543,7 @@ async def create_index( table_name: Optional[VectorTableName] = None, index_measure: IndexMeasure = IndexMeasure.cosine_distance, index_method: IndexMethod = IndexMethod.auto, - index_arguments: Optional[ - Union[IndexArgsIVFFlat, IndexArgsHNSW] - ] = None, + index_arguments: Optional[IndexArgsIVFFlat | IndexArgsHNSW] = None, index_name: Optional[str] = None, index_column: Optional[str] = None, concurrently: bool = True, @@ -597,8 +552,8 @@ async def create_index( @abstractmethod async def list_indices( - self, table_name: Optional[VectorTableName] = None - ) -> list[dict]: + self, offset: int, limit: int, filters: Optional[dict] = None + ) -> dict: pass @abstractmethod @@ -613,267 +568,112 @@ async def delete_index( @abstractmethod async def get_semantic_neighbors( self, + offset: int, + limit: int, document_id: UUID, chunk_id: UUID, - limit: int = 10, similarity_threshold: float = 0.5, ) -> list[dict[str, Any]]: pass - -class KGHandler(Handler): - """Base handler for Knowledge Graph operations.""" - - @abstractmethod - async def create_tables(self) -> None: - """Create required database tables.""" - pass - @abstractmethod - async def add_kg_extractions( + async def list_chunks( self, - kg_extractions: list[KGExtraction], - table_prefix: str = "chunk_", - ) -> Tuple[int, int]: - """Add KG extractions to storage.""" + offset: int, + limit: int, + filters: Optional[dict[str, Any]] = None, + include_vectors: bool = False, + ) -> dict[str, Any]: pass - @abstractmethod - async def add_entities( - self, - entities: list[Entity], - table_name: str, - conflict_columns: list[str] = [], - ) -> Any: - """Add entities to storage.""" - pass - @abstractmethod - async def add_triples( - self, - triples: list[Triple], - table_name: str = "chunk_triple", - ) -> None: - """Add triples to storage.""" - pass +class EntityHandler(Handler): @abstractmethod - async def get_entity_map( - self, offset: int, limit: int, document_id: UUID - ) -> Dict[str, Dict[str, List[Dict[str, Any]]]]: - """Get entity map for a document.""" + async def create(self, *args: Any, **kwargs: Any) -> Entity: + """Create entities in storage.""" pass @abstractmethod - async def upsert_embeddings( - self, - data: List[Tuple[Any]], - table_name: str, - ) -> None: - """Upsert embeddings into storage.""" + async def get(self, *args: Any, **kwargs: Any) -> list[Entity]: + """Get entities from storage.""" pass @abstractmethod - async def vector_query( - self, query: str, **kwargs: Any - ) -> AsyncGenerator[Any, None]: - """Perform vector similarity search.""" + async def update(self, *args: Any, **kwargs: Any) -> Entity: + """Update entities in storage.""" pass - # Community management @abstractmethod - async def add_community_info(self, communities: List[Any]) -> None: - """Add communities to storage.""" + async def delete(self, *args: Any, **kwargs: Any) -> None: + """Delete entities from storage.""" pass - @abstractmethod - async def get_communities( - self, - collection_id: Optional[UUID] = None, - levels: Optional[list[int]] = None, - community_numbers: Optional[list[int]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict: - """Get communities for a collection.""" - pass +class RelationshipHandler(Handler): @abstractmethod - async def add_community_report( - self, community_report: CommunityReport - ) -> None: - """Add a community report.""" + async def create(self, *args: Any, **kwargs: Any) -> Relationship: + """Add relationships to storage.""" pass @abstractmethod - async def get_community_details( - self, community_number: int, collection_id: UUID - ) -> Tuple[int, list[Entity], list[Triple]]: - """Get detailed information about a community.""" + async def get(self, *args: Any, **kwargs: Any) -> list[Relationship]: + """Get relationships from storage.""" pass @abstractmethod - async def get_community_reports( - self, collection_id: UUID - ) -> List[CommunityReport]: - """Get community reports for a collection.""" + async def update(self, *args: Any, **kwargs: Any) -> Relationship: + """Update relationships in storage.""" pass @abstractmethod - async def check_community_reports_exist( - self, collection_id: UUID, offset: int, limit: int - ) -> List[int]: - """Check which community reports exist.""" + async def delete(self, *args: Any, **kwargs: Any) -> None: + """Delete relationships from storage.""" pass - @abstractmethod - async def perform_graph_clustering( - self, - collection_id: UUID, - leiden_params: Dict[str, Any], - ) -> int: - """Perform graph clustering.""" - pass - # Graph operations +class CommunityHandler(Handler): @abstractmethod - async def delete_graph_for_collection( - self, collection_id: UUID, cascade: bool = False - ) -> None: - """Delete graph data for a collection.""" + async def create(self, *args: Any, **kwargs: Any) -> Community: + """Create communities in storage.""" pass @abstractmethod - async def delete_node_via_document_id( - self, document_id: UUID, collection_id: UUID - ) -> None: - """Delete a node using document ID.""" + async def get(self, *args: Any, **kwargs: Any) -> list[Community]: + """Get communities from storage.""" pass - # Entity and Triple management @abstractmethod - async def get_entities( - self, - collection_id: Optional[UUID] = None, - entity_ids: Optional[List[str]] = None, - entity_names: Optional[List[str]] = None, - entity_table_name: str = "document_entity", - extra_columns: Optional[List[str]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict: - """Get entities from storage.""" + async def update(self, *args: Any, **kwargs: Any) -> Community: + """Update communities in storage.""" pass @abstractmethod - async def get_triples( - self, - collection_id: Optional[UUID] = None, - entity_names: Optional[List[str]] = None, - triple_ids: Optional[List[str]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict: - """Get triples from storage.""" + async def delete(self, *args: Any, **kwargs: Any) -> None: + """Delete communities from storage.""" pass - @abstractmethod - async def get_entity_count( - self, - collection_id: Optional[UUID] = None, - document_id: Optional[UUID] = None, - distinct: bool = False, - entity_table_name: str = "document_entity", - ) -> int: - """Get entity count.""" - pass - @abstractmethod - async def get_triple_count( - self, - collection_id: Optional[UUID] = None, - document_id: Optional[UUID] = None, - ) -> int: - """Get triple count.""" - pass +class GraphHandler(Handler): - # Cost estimation methods - @abstractmethod - async def get_creation_estimate( - self, collection_id: UUID, kg_creation_settings: KGCreationSettings - ) -> KGCreationEstimationResponse: - """Get creation cost estimate.""" - pass + def __init__(self, *args: Any, **kwargs: Any) -> None: + super().__init__(*args, **kwargs) @abstractmethod - async def get_enrichment_estimate( - self, collection_id: UUID, kg_enrichment_settings: KGEnrichmentSettings - ) -> KGEnrichmentEstimationResponse: - """Get enrichment cost estimate.""" + async def create(self, *args: Any, **kwargs: Any) -> GraphResponse: + """Create graph""" pass @abstractmethod - async def get_deduplication_estimate( + async def update( self, - collection_id: UUID, - kg_deduplication_settings: KGEntityDeduplicationSettings, - ) -> KGDeduplicationEstimationResponse: - """Get deduplication cost estimate.""" + graph_id: UUID, + name: Optional[str], + description: Optional[str], + ) -> GraphResponse: + """Update graph""" pass - # Other operations - @abstractmethod - async def create_vector_index(self) -> None: - """Create vector index.""" - raise NotImplementedError - - @abstractmethod - async def delete_triples(self, triple_ids: list[int]) -> None: - """Delete triples.""" - raise NotImplementedError - - @abstractmethod - async def get_schema(self) -> Any: - """Get schema.""" - raise NotImplementedError - - @abstractmethod - async def structured_query(self) -> Any: - """Perform structured query.""" - raise NotImplementedError - - @abstractmethod - async def update_extraction_prompt(self) -> None: - """Update extraction prompt.""" - raise NotImplementedError - - @abstractmethod - async def update_kg_search_prompt(self) -> None: - """Update KG search prompt.""" - raise NotImplementedError - - @abstractmethod - async def upsert_triples(self) -> None: - """Upsert triples.""" - raise NotImplementedError - - @abstractmethod - async def get_existing_entity_extraction_ids( - self, document_id: UUID - ) -> list[str]: - """Get existing entity extraction IDs.""" - raise NotImplementedError - - @abstractmethod - async def get_all_triples( - self, collection_id: UUID, document_ids: Optional[list[UUID]] = None - ) -> List[Triple]: - raise NotImplementedError - - @abstractmethod - async def update_entity_descriptions(self, entities: list[Entity]): - raise NotImplementedError - class PromptHandler(Handler): """Abstract base class for prompt handling operations.""" @@ -885,6 +685,16 @@ async def add_prompt( """Add a new prompt template to the database.""" pass + @abstractmethod + async def get_cached_prompt( + self, + prompt_name: str, + inputs: Optional[dict[str, Any]] = None, + prompt_override: Optional[str] = None, + ) -> str: + """Retrieve and format a prompt template.""" + pass + @abstractmethod async def get_prompt( self, @@ -972,10 +782,10 @@ async def delete_file(self, document_id: UUID) -> bool: @abstractmethod async def get_files_overview( self, + offset: int, + limit: int, filter_document_ids: Optional[list[UUID]] = None, filter_file_names: Optional[list[str]] = None, - offset: int = 0, - limit: int = 100, ) -> list[dict]: """Get an overview of stored files.""" pass @@ -1004,25 +814,25 @@ async def info_log( @abstractmethod async def get_logs( - self, run_ids: List[UUID], limit_per_run: int = 10 - ) -> List[Dict]: + self, run_ids: list[UUID], limit_per_run: int = 10 + ) -> list[dict]: """Retrieve logs for specified run IDs.""" pass @abstractmethod async def get_info_logs( self, - offset: int = 0, - limit: int = 100, + offset: int, + limit: int, run_type_filter: Optional[RunType] = None, - user_ids: Optional[List[UUID]] = None, - ) -> List[RunInfoLog]: + user_ids: Optional[list[UUID]] = None, + ) -> list[RunInfoLog]: """Retrieve run information logs with filtering options.""" pass # Conversation management methods @abstractmethod - async def create_conversation(self) -> str: + async def create_conversation(self) -> dict: """Create a new conversation and return its ID.""" pass @@ -1032,12 +842,12 @@ async def delete_conversation(self, conversation_id: str) -> None: pass @abstractmethod - async def get_conversations_overview( + async def get_conversations( self, - conversation_ids: Optional[List[UUID]] = None, - offset: int = 0, - limit: int = -1, - ) -> Dict[str, Union[List[Dict], int]]: + offset: int, + limit: int, + conversation_ids: Optional[list[UUID]] = None, + ) -> dict[str, list[dict] | int]: """Get an overview of conversations with pagination.""" pass @@ -1048,7 +858,7 @@ async def add_message( conversation_id: str, content: Message, parent_id: Optional[str] = None, - metadata: Optional[Dict] = None, + metadata: Optional[dict] = None, ) -> str: """Add a message to a conversation.""" pass @@ -1063,13 +873,13 @@ async def edit_message( @abstractmethod async def get_conversation( self, conversation_id: str, branch_id: Optional[str] = None - ) -> List[Tuple[str, Message]]: + ) -> list[Tuple[str, Message]]: """Retrieve all messages in a conversation branch.""" pass # Branch management methods @abstractmethod - async def get_branches_overview(self, conversation_id: str) -> List[Dict]: + async def get_branches(self, conversation_id: str) -> list[dict]: """Get an overview of all branches in a conversation.""" pass @@ -1092,11 +902,13 @@ async def branch_at_message(self, message_id: str) -> str: class DatabaseProvider(Provider): connection_manager: DatabaseConnectionManager document_handler: DocumentHandler - collection_handler: CollectionHandler + collections_handler: CollectionsHandler token_handler: TokenHandler user_handler: UserHandler - vector_handler: VectorHandler - kg_handler: KGHandler + vector_handler: ChunkHandler + entity_handler: EntityHandler + relationship_handler: RelationshipHandler + graph_handler: GraphHandler prompt_handler: PromptHandler file_handler: FileHandler logging_handler: LoggingHandler @@ -1117,7 +929,8 @@ async def __aexit__(self, exc_type, exc, tb): # Document handler methods async def upsert_documents_overview( - self, documents_overview: Union[DocumentInfo, list[DocumentInfo]] + self, + documents_overview: DocumentResponse | list[DocumentResponse], ) -> None: return await self.document_handler.upsert_documents_overview( documents_overview @@ -1132,28 +945,28 @@ async def delete_from_documents_overview( async def get_documents_overview( self, + offset: int, + limit: int, filter_user_ids: Optional[list[UUID]] = None, filter_document_ids: Optional[list[UUID]] = None, filter_collection_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = -1, ) -> dict[str, Any]: return await self.document_handler.get_documents_overview( - filter_user_ids, - filter_document_ids, - filter_collection_ids, - offset, - limit, + offset=offset, + limit=limit, + filter_user_ids=filter_user_ids, + filter_document_ids=filter_document_ids, + filter_collection_ids=filter_collection_ids, ) async def get_workflow_status( - self, id: Union[UUID, list[UUID]], status_type: str + self, id: UUID | list[UUID], status_type: str ): return await self.document_handler.get_workflow_status(id, status_type) async def set_workflow_status( self, - id: Union[UUID, list[UUID]], + id: UUID | list[UUID], status_type: str, status: str, ): @@ -1164,7 +977,7 @@ async def set_workflow_status( async def get_document_ids_by_status( self, status_type: str, - status: Union[str, list[str]], + status: str | list[str], collection_id: Optional[UUID] = None, ): return await self.document_handler.get_document_ids_by_status( @@ -1172,76 +985,59 @@ async def get_document_ids_by_status( ) # Collection handler methods - async def create_default_collection( - self, user_id: Optional[UUID] = None - ) -> CollectionResponse: - return await self.collection_handler.create_default_collection(user_id) - async def collection_exists(self, collection_id: UUID) -> bool: - return await self.collection_handler.collection_exists(collection_id) + return await self.collections_handler.collection_exists(collection_id) async def create_collection( self, - name: str, + owner_id: UUID, + name: Optional[str] = None, description: str = "", collection_id: Optional[UUID] = None, ) -> CollectionResponse: - return await self.collection_handler.create_collection( - name, description, collection_id + return await self.collections_handler.create_collection( + owner_id=owner_id, + name=name, + description=description, + collection_id=collection_id, ) - async def get_collection(self, collection_id: UUID) -> CollectionResponse: - return await self.collection_handler.get_collection(collection_id) - async def update_collection( self, collection_id: UUID, name: Optional[str] = None, description: Optional[str] = None, ) -> CollectionResponse: - return await self.collection_handler.update_collection( + return await self.collections_handler.update_collection( collection_id, name, description ) async def delete_collection_relational(self, collection_id: UUID) -> None: - return await self.collection_handler.delete_collection_relational( + return await self.collections_handler.delete_collection_relational( collection_id ) - async def list_collections( - self, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[CollectionResponse], int]]: - return await self.collection_handler.list_collections(offset, limit) - - async def get_collections_by_ids( - self, collection_ids: list[UUID] - ) -> list[CollectionResponse]: - return await self.collection_handler.get_collections_by_ids( - collection_ids - ) - async def documents_in_collection( - self, collection_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[DocumentInfo], int]]: - return await self.collection_handler.documents_in_collection( + self, collection_id: UUID, offset: int, limit: int + ) -> dict[str, list[DocumentResponse] | int]: + return await self.collections_handler.documents_in_collection( collection_id, offset, limit ) async def get_collections_overview( self, - collection_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict[str, Union[list[CollectionOverviewResponse], int]]: - return await self.collection_handler.get_collections_overview( - collection_ids, offset, limit - ) - - async def get_collections_for_user( - self, user_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[CollectionResponse], int]]: - return await self.collection_handler.get_collections_for_user( - user_id, offset, limit + offset: int, + limit: int, + filter_user_ids: Optional[list[UUID]] = None, + filter_document_ids: Optional[list[UUID]] = None, + filter_collection_ids: Optional[list[UUID]] = None, + ) -> dict[str, list[CollectionResponse] | int]: + return await self.collections_handler.get_collections_overview( + offset=offset, + limit=limit, + filter_user_ids=filter_user_ids, + filter_document_ids=filter_document_ids, + filter_collection_ids=filter_collection_ids, ) async def assign_document_to_collection_relational( @@ -1249,21 +1045,15 @@ async def assign_document_to_collection_relational( document_id: UUID, collection_id: UUID, ) -> UUID: - return await self.collection_handler.assign_document_to_collection_relational( - document_id, collection_id - ) - - async def document_collections( - self, document_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[CollectionResponse], int]]: - return await self.collection_handler.document_collections( - document_id, offset, limit + return await self.collections_handler.assign_document_to_collection_relational( + document_id=document_id, + collection_id=collection_id, ) async def remove_document_from_collection_relational( self, document_id: UUID, collection_id: UUID ) -> None: - return await self.collection_handler.remove_document_from_collection_relational( + return await self.collections_handler.remove_document_from_collection_relational( document_id, collection_id ) @@ -1286,16 +1076,22 @@ async def clean_expired_blacklisted_tokens( ) # User handler methods - async def get_user_by_id(self, user_id: UUID) -> UserResponse: + async def get_user_by_id(self, user_id: UUID) -> User: return await self.user_handler.get_user_by_id(user_id) - async def get_user_by_email(self, email: str) -> UserResponse: + async def get_user_by_email(self, email: str) -> User: return await self.user_handler.get_user_by_email(email) - async def create_user(self, email: str, password: str) -> UserResponse: - return await self.user_handler.create_user(email, password) + async def create_user( + self, email: str, password: str, is_superuser: bool = False + ) -> User: + return await self.user_handler.create_user( + email=email, + password=password, + is_superuser=is_superuser, + ) - async def update_user(self, user: UserResponse) -> UserResponse: + async def update_user(self, user: User) -> User: return await self.user_handler.update_user(user) async def delete_user_relational(self, user_id: UUID) -> None: @@ -1308,7 +1104,7 @@ async def update_user_password( user_id, new_hashed_password ) - async def get_all_users(self) -> list[UserResponse]: + async def get_all_users(self) -> list[User]: return await self.user_handler.get_all_users() async def store_verification_code( @@ -1351,21 +1147,21 @@ async def remove_user_from_all_collections(self, user_id: UUID): async def add_user_to_collection( self, user_id: UUID, collection_id: UUID - ) -> None: + ) -> bool: return await self.user_handler.add_user_to_collection( user_id, collection_id ) async def remove_user_from_collection( self, user_id: UUID, collection_id: UUID - ) -> None: + ) -> bool: return await self.user_handler.remove_user_from_collection( user_id, collection_id ) async def get_users_in_collection( - self, collection_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[UserResponse], int]]: + self, collection_id: UUID, offset: int, limit: int + ) -> dict[str, list[User] | int]: return await self.user_handler.get_users_in_collection( collection_id, offset, limit ) @@ -1385,18 +1181,23 @@ async def mark_user_as_verified(self, user_id: UUID): async def get_users_overview( self, + offset: int, + limit: int, user_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict[str, Union[list[UserStats], int]]: + ) -> dict[str, list[User] | int]: return await self.user_handler.get_users_overview( - user_ids, offset, limit + offset=offset, + limit=limit, + user_ids=user_ids, ) async def get_user_validation_data( - self, user_id: UUID, *args, **kwargs + self, + user_id: UUID, ) -> dict: - return await self.user_handler.get_user_validation_data(user_id) + return await self.user_handler.get_user_validation_data( + user_id=user_id + ) # Vector handler methods async def upsert(self, entry: VectorEntry) -> None: @@ -1407,14 +1208,14 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: async def semantic_search( self, query_vector: list[float], search_settings: SearchSettings - ) -> list[VectorSearchResult]: + ) -> list[ChunkSearchResult]: return await self.vector_handler.semantic_search( query_vector, search_settings ) async def full_text_search( self, query_text: str, search_settings: SearchSettings - ) -> list[VectorSearchResult]: + ) -> list[ChunkSearchResult]: return await self.vector_handler.full_text_search( query_text, search_settings ) @@ -1426,7 +1227,7 @@ async def hybrid_search( search_settings: SearchSettings, *args, **kwargs, - ) -> list[VectorSearchResult]: + ) -> list[ChunkSearchResult]: return await self.vector_handler.hybrid_search( query_text, query_vector, search_settings, *args, **kwargs ) @@ -1436,7 +1237,7 @@ async def search_documents( query_text: str, settings: SearchSettings, query_embedding: Optional[list[float]] = None, - ) -> list[DocumentInfo]: + ) -> list[DocumentResponse]: return await self.document_handler.search_documents( query_text, query_embedding, settings ) @@ -1444,17 +1245,33 @@ async def search_documents( async def delete( self, filters: dict[str, Any] ) -> dict[str, dict[str, str]]: - return await self.vector_handler.delete(filters) + result = await self.vector_handler.delete(filters) + try: + await self.entity_handler.delete(parent_id=filters["id"]["$eq"]) + except Exception as e: + logger.debug(f"Attempt to delete entity failed: {e}") + try: + await self.relationship_handler.delete( + parent_id=filters["id"]["$eq"] + ) + except Exception as e: + logger.debug(f"Attempt to delete relationship failed: {e}") + return result async def assign_document_to_collection_vector( - self, document_id: UUID, collection_id: UUID + self, + document_id: UUID, + collection_id: UUID, ) -> None: return await self.vector_handler.assign_document_to_collection_vector( - document_id, collection_id + document_id=document_id, + collection_id=collection_id, ) async def remove_document_from_collection_vector( - self, document_id: UUID, collection_id: UUID + self, + document_id: UUID, + collection_id: UUID, ) -> None: return ( await self.vector_handler.remove_document_from_collection_vector( @@ -1470,28 +1287,29 @@ async def delete_collection_vector(self, collection_id: UUID) -> None: collection_id ) - async def get_document_chunks( + async def list_document_chunks( self, document_id: UUID, - offset: int = 0, - limit: int = -1, + offset: int, + limit: int, include_vectors: bool = False, ) -> dict[str, Any]: - return await self.vector_handler.get_document_chunks( - document_id, offset, limit, include_vectors + return await self.vector_handler.list_document_chunks( + document_id=document_id, + offset=offset, + limit=limit, + include_vectors=include_vectors, ) - async def get_chunk(self, extraction_id: UUID) -> Optional[dict[str, Any]]: - return await self.vector_handler.get_chunk(extraction_id) + async def get_chunk(self, chunk_id: UUID) -> dict: + return await self.vector_handler.get_chunk(chunk_id) async def create_index( self, table_name: Optional[VectorTableName] = None, index_measure: IndexMeasure = IndexMeasure.cosine_distance, index_method: IndexMethod = IndexMethod.auto, - index_arguments: Optional[ - Union[IndexArgsIVFFlat, IndexArgsHNSW] - ] = None, + index_arguments: Optional[IndexArgsIVFFlat | IndexArgsHNSW] = None, index_name: Optional[str] = None, index_column: Optional[str] = None, concurrently: bool = True, @@ -1507,9 +1325,9 @@ async def create_index( ) async def list_indices( - self, table_name: Optional[VectorTableName] = None - ) -> list[dict]: - return await self.vector_handler.list_indices(table_name) + self, offset: int, limit: int, filters: Optional[dict] = None + ) -> dict: + return await self.vector_handler.list_indices(offset, limit, filters) async def delete_index( self, @@ -1525,262 +1343,16 @@ async def get_semantic_neighbors( self, document_id: UUID, chunk_id: UUID, - limit: int = 10, + offset: int, + limit: int, similarity_threshold: float = 0.5, ) -> list[dict[str, Any]]: return await self.vector_handler.get_semantic_neighbors( - document_id, chunk_id, limit, similarity_threshold - ) - - async def add_kg_extractions( - self, - kg_extractions: list[KGExtraction], - table_prefix: str = "chunk_", - ) -> Tuple[int, int]: - """Forward to KG handler add_kg_extractions method.""" - return await self.kg_handler.add_kg_extractions( - kg_extractions, table_prefix - ) - - async def add_entities( - self, - entities: list[Entity], - table_name: str, - conflict_columns: list[str] = [], - ) -> Any: - """Forward to KG handler add_entities method.""" - return await self.kg_handler.add_entities( - entities, table_name, conflict_columns - ) - - async def add_triples( - self, - triples: list[Triple], - table_name: str = "chunk_triple", - ) -> None: - """Forward to KG handler add_triples method.""" - return await self.kg_handler.add_triples(triples, table_name) - - async def get_entity_map( - self, offset: int, limit: int, document_id: UUID - ) -> Dict[str, Dict[str, List[Dict[str, Any]]]]: - """Forward to KG handler get_entity_map method.""" - return await self.kg_handler.get_entity_map(offset, limit, document_id) - - async def upsert_embeddings( - self, - data: List[Tuple[Any]], - table_name: str, - ) -> None: - """Forward to KG handler upsert_embeddings method.""" - return await self.kg_handler.upsert_embeddings(data, table_name) - - # Community methods - async def add_community_info(self, communities: List[Any]) -> None: - """Forward to KG handler add_communities method.""" - return await self.kg_handler.add_community_info(communities) - - async def get_communities( - self, - collection_id: Optional[UUID] = None, - levels: Optional[list[int]] = None, - community_numbers: Optional[list[int]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict: - """Forward to KG handler get_communities method.""" - return await self.kg_handler.get_communities( - collection_id, - levels, - community_numbers, - offset, - limit, - ) - - async def add_community_report( - self, community_report: CommunityReport - ) -> None: - """Forward to KG handler add_community_report method.""" - return await self.kg_handler.add_community_report(community_report) - - async def get_community_details( - self, community_number: int, collection_id: UUID - ) -> Tuple[int, list[Entity], list[Triple]]: - """Forward to KG handler get_community_details method.""" - return await self.kg_handler.get_community_details( - community_number, collection_id - ) - - async def get_community_reports( - self, collection_id: UUID - ) -> List[CommunityReport]: - """Forward to KG handler get_community_reports method.""" - return await self.kg_handler.get_community_reports(collection_id) - - async def check_community_reports_exist( - self, collection_id: UUID, offset: int, limit: int - ) -> List[int]: - """Forward to KG handler check_community_reports_exist method.""" - return await self.kg_handler.check_community_reports_exist( - collection_id, offset, limit - ) - - async def perform_graph_clustering( - self, - collection_id: UUID, - leiden_params: Dict[str, Any], - ) -> int: - """Forward to KG handler perform_graph_clustering method.""" - return await self.kg_handler.perform_graph_clustering( - collection_id, leiden_params - ) - - # Graph operations - async def delete_graph_for_collection( - self, collection_id: UUID, cascade: bool = False - ) -> None: - """Forward to KG handler delete_graph_for_collection method.""" - return await self.kg_handler.delete_graph_for_collection( - collection_id, cascade - ) - - async def delete_node_via_document_id( - self, document_id: UUID, collection_id: UUID - ) -> None: - """Forward to KG handler delete_node_via_document_id method.""" - return await self.kg_handler.delete_node_via_document_id( - document_id, collection_id - ) - - # Entity and Triple operations - async def get_entities( - self, - collection_id: Optional[UUID], - entity_ids: Optional[List[str]] = None, - entity_names: Optional[List[str]] = None, - entity_table_name: str = "document_entity", - extra_columns: Optional[List[str]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict: - """Forward to KG handler get_entities method.""" - return await self.kg_handler.get_entities( - collection_id, - entity_ids, - entity_names, - entity_table_name, - extra_columns, - offset, - limit, - ) - - async def get_triples( - self, - collection_id: Optional[UUID] = None, - entity_names: Optional[List[str]] = None, - triple_ids: Optional[List[str]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict: - """Forward to KG handler get_triples method.""" - return await self.kg_handler.get_triples( - collection_id, - entity_names, - triple_ids, - offset, - limit, - ) - - async def get_entity_count( - self, - collection_id: Optional[UUID] = None, - document_id: Optional[UUID] = None, - distinct: bool = False, - entity_table_name: str = "document_entity", - ) -> int: - """Forward to KG handler get_entity_count method.""" - return await self.kg_handler.get_entity_count( - collection_id, document_id, distinct, entity_table_name - ) - - async def get_triple_count( - self, - collection_id: Optional[UUID] = None, - document_id: Optional[UUID] = None, - ) -> int: - """Forward to KG handler get_triple_count method.""" - return await self.kg_handler.get_triple_count( - collection_id, document_id - ) - - # Estimation methods - async def get_creation_estimate( - self, collection_id: UUID, kg_creation_settings: KGCreationSettings - ) -> KGCreationEstimationResponse: - """Forward to KG handler get_creation_estimate method.""" - return await self.kg_handler.get_creation_estimate( - collection_id, kg_creation_settings - ) - - async def get_enrichment_estimate( - self, collection_id: UUID, kg_enrichment_settings: KGEnrichmentSettings - ) -> KGEnrichmentEstimationResponse: - """Forward to KG handler get_enrichment_estimate method.""" - return await self.kg_handler.get_enrichment_estimate( - collection_id, kg_enrichment_settings - ) - - async def get_deduplication_estimate( - self, - collection_id: UUID, - kg_deduplication_settings: KGEntityDeduplicationSettings, - ) -> KGDeduplicationEstimationResponse: - """Forward to KG handler get_deduplication_estimate method.""" - return await self.kg_handler.get_deduplication_estimate( - collection_id, kg_deduplication_settings - ) - - async def get_all_triples( - self, collection_id: UUID, document_ids: Optional[list[UUID]] = None - ) -> List[Triple]: - return await self.kg_handler.get_all_triples( - collection_id, document_ids - ) - - async def update_entity_descriptions(self, entities: list[Entity]): - return await self.kg_handler.update_entity_descriptions(entities) - - async def vector_query( - self, query: str, **kwargs: Any - ) -> AsyncGenerator[Any, None]: - return self.kg_handler.vector_query(query, **kwargs) # type: ignore - - async def create_vector_index(self) -> None: - return await self.kg_handler.create_vector_index() - - async def delete_triples(self, triple_ids: list[int]) -> None: - return await self.kg_handler.delete_triples(triple_ids) - - async def get_schema(self) -> Any: - return await self.kg_handler.get_schema() - - async def structured_query(self) -> Any: - return await self.kg_handler.structured_query() - - async def update_extraction_prompt(self) -> None: - return await self.kg_handler.update_extraction_prompt() - - async def update_kg_search_prompt(self) -> None: - return await self.kg_handler.update_kg_search_prompt() - - async def upsert_triples(self) -> None: - return await self.kg_handler.upsert_triples() - - async def get_existing_entity_extraction_ids( - self, document_id: UUID - ) -> list[str]: - return await self.kg_handler.get_existing_entity_extraction_ids( - document_id + offset=offset, + limit=limit, + document_id=document_id, + chunk_id=chunk_id, + similarity_threshold=similarity_threshold, ) async def add_prompt( @@ -1790,6 +1362,16 @@ async def add_prompt( name, template, input_types ) + async def get_cached_prompt( + self, + prompt_name: str, + inputs: Optional[dict[str, Any]] = None, + prompt_override: Optional[str] = None, + ) -> str: + return await self.prompt_handler.get_cached_prompt( + prompt_name, inputs, prompt_override + ) + async def get_prompt( self, prompt_name: str, @@ -1849,13 +1431,16 @@ async def delete_file(self, document_id: UUID) -> bool: async def get_files_overview( self, + offset: int, + limit: int, filter_document_ids: Optional[list[UUID]] = None, filter_file_names: Optional[list[str]] = None, - offset: int = 0, - limit: int = 100, ) -> list[dict]: return await self.file_handler.get_files_overview( - filter_document_ids, filter_file_names, offset, limit + offset=offset, + limit=limit, + filter_document_ids=filter_document_ids, + filter_file_names=filter_file_names, ) async def log( @@ -1878,11 +1463,11 @@ async def info_log( async def get_info_logs( self, - offset: int = 0, - limit: int = 100, + offset: int, + limit: int, run_type_filter: Optional[RunType] = None, - user_ids: Optional[List[UUID]] = None, - ) -> List[RunInfoLog]: + user_ids: Optional[list[UUID]] = None, + ) -> list[RunInfoLog]: """Retrieve log info entries with filtering and pagination.""" return await self.logging_handler.get_info_logs( offset, limit, run_type_filter, user_ids @@ -1890,29 +1475,31 @@ async def get_info_logs( async def get_logs( self, - run_ids: List[UUID], + run_ids: list[UUID], limit_per_run: int = 10, - ) -> List[Dict[str, Any]]: + ) -> list[dict[str, Any]]: """Retrieve logs for specified run IDs with a per-run limit.""" return await self.logging_handler.get_logs(run_ids, limit_per_run) - async def create_conversation(self) -> str: - """Create a new conversation and return its ID.""" + async def create_conversation(self) -> dict: + """Create a new conversation and return its ID and timestamp.""" return await self.logging_handler.create_conversation() async def delete_conversation(self, conversation_id: str) -> None: """Delete a conversation and all associated data.""" return await self.logging_handler.delete_conversation(conversation_id) - async def get_conversations_overview( + async def get_conversations( self, - conversation_ids: Optional[List[UUID]] = None, - offset: int = 0, - limit: int = -1, - ) -> Dict[str, Union[List[Dict], int]]: + offset: int, + limit: int, + conversation_ids: Optional[list[UUID]] = None, + ) -> dict[str, list[dict] | int]: """Get an overview of conversations with pagination.""" - return await self.logging_handler.get_conversations_overview( - conversation_ids, offset, limit + return await self.logging_handler.get_conversations( + offset=offset, + limit=limit, + conversation_ids=conversation_ids, ) async def add_message( @@ -1920,7 +1507,7 @@ async def add_message( conversation_id: str, content: Message, parent_id: Optional[str] = None, - metadata: Optional[Dict] = None, + metadata: Optional[dict] = None, ) -> str: """Add a message to a conversation.""" return await self.logging_handler.add_message( @@ -1935,17 +1522,15 @@ async def edit_message( async def get_conversation( self, conversation_id: str, branch_id: Optional[str] = None - ) -> List[Tuple[str, Message]]: + ) -> list[Tuple[str, Message]]: """Retrieve all messages in a conversation branch.""" return await self.logging_handler.get_conversation( conversation_id, branch_id ) - async def get_branches_overview(self, conversation_id: str) -> List[Dict]: + async def get_branches(self, conversation_id: str) -> list[dict]: """Get an overview of all branches in a conversation.""" - return await self.logging_handler.get_branches_overview( - conversation_id - ) + return await self.logging_handler.get_branches(conversation_id) async def get_next_branch(self, current_branch_id: str) -> Optional[str]: """Get the ID of the next branch in chronological order.""" @@ -1958,3 +1543,14 @@ async def get_prev_branch(self, current_branch_id: str) -> Optional[str]: async def branch_at_message(self, message_id: str) -> str: """Create a new branch starting at a specific message.""" return await self.logging_handler.branch_at_message(message_id) + + async def list_chunks( + self, + offset: int, + limit: int, + filters: Optional[dict[str, Any]] = None, + include_vectors: bool = False, + ) -> dict[str, Any]: + return await self.vector_handler.list_chunks( + offset, limit, filters, include_vectors + ) diff --git a/py/core/base/providers/email.py b/py/core/base/providers/email.py index 69c542918..9590c666c 100644 --- a/py/core/base/providers/email.py +++ b/py/core/base/providers/email.py @@ -1,8 +1,8 @@ # email_provider.py import logging +import os from abc import ABC, abstractmethod from typing import Optional -import os from .base import Provider, ProviderConfig diff --git a/py/core/base/providers/embedding.py b/py/core/base/providers/embedding.py index 57eee761c..60d048115 100644 --- a/py/core/base/providers/embedding.py +++ b/py/core/base/providers/embedding.py @@ -11,8 +11,8 @@ from core.base.abstractions import VectorQuantizationSettings from ..abstractions import ( + ChunkSearchResult, EmbeddingPurpose, - VectorSearchResult, default_embedding_prefixes, ) from .base import Provider, ProviderConfig @@ -168,7 +168,7 @@ def get_embeddings( def rerank( self, query: str, - results: list[VectorSearchResult], + results: list[ChunkSearchResult], stage: PipeStage = PipeStage.RERANK, limit: int = 10, ): @@ -178,7 +178,7 @@ def rerank( async def arerank( self, query: str, - results: list[VectorSearchResult], + results: list[ChunkSearchResult], stage: PipeStage = PipeStage.RERANK, limit: int = 10, ): diff --git a/py/core/base/providers/ingestion.py b/py/core/base/providers/ingestion.py index 7b6df0820..2d6d9947b 100644 --- a/py/core/base/providers/ingestion.py +++ b/py/core/base/providers/ingestion.py @@ -23,10 +23,10 @@ class IngestionConfig(ProviderConfig): audio_transcription_model: str = "openai/whisper-1" vision_img_prompt_name: str = "vision_img" - vision_img_model: str = "openai/gpt-4-mini" + vision_img_model: str = "openai/gpt-4o" vision_pdf_prompt_name: str = "vision_pdf" - vision_pdf_model: str = "openai/gpt-4-mini" + vision_pdf_model: str = "openai/gpt-4o" skip_document_summary: bool = False document_summary_system_prompt: str = "default_system" diff --git a/py/core/base/utils/__init__.py b/py/core/base/utils/__init__.py index 11e329da7..26928cd0f 100644 --- a/py/core/base/utils/__init__.py +++ b/py/core/base/utils/__init__.py @@ -2,17 +2,15 @@ RecursiveCharacterTextSplitter, TextSplitter, _decorate_vector_type, + _get_str_estimation_output, decrement_version, - format_entity_types, - format_relations, format_search_results_for_llm, format_search_results_for_stream, - generate_collection_id_from_name, generate_default_prompt_id, generate_default_user_collection_id, generate_document_id, generate_extraction_id, - generate_run_id, + generate_id, generate_user_id, increment_version, llm_cost_per_million_tokens, @@ -22,11 +20,9 @@ ) __all__ = [ - "format_entity_types", - "format_relations", "format_search_results_for_stream", "format_search_results_for_llm", - "generate_run_id", + "generate_id", "generate_default_user_collection_id", "increment_version", "decrement_version", @@ -35,11 +31,11 @@ "generate_document_id", "generate_extraction_id", "generate_user_id", - "generate_collection_id_from_name", "generate_default_prompt_id", "RecursiveCharacterTextSplitter", "TextSplitter", "llm_cost_per_million_tokens", "validate_uuid", "_decorate_vector_type", + "_get_str_estimation_output", ] diff --git a/py/core/configs/full_azure.toml b/py/core/configs/full_azure.toml index f12b3e982..5bfa6547c 100644 --- a/py/core/configs/full_azure.toml +++ b/py/core/configs/full_azure.toml @@ -10,21 +10,21 @@ # KG settings batch_size = 256 - [database.kg_creation_settings] + [database.graph_creation_settings] generation_config = { model = "azure/gpt-4o-mini" } - [database.kg_entity_deduplication_settings] + [database.graph_entity_deduplication_settings] generation_config = { model = "azure/gpt-4o-mini" } - [database.kg_enrichment_settings] + [database.graph_enrichment_settings] generation_config = { model = "azure/gpt-4o-mini" } - [database.kg_search_settings] + [database.graph_search_settings] generation_config = { model = "azure/gpt-4o-mini" } [embedding] provider = "litellm" -base_model = "openai/text-embedding-3-small" # continue with `openai` for embeddings, due to server rate limit on azure +base_model = "azure/text-embedding-3-small" [file] provider = "postgres" @@ -37,6 +37,10 @@ new_after_n_chars = 512 max_characters = 1_024 combine_under_n_chars = 128 overlap = 256 +document_summary_model = "azure/gpt-4o-mini" +vision_img_model = "azure/gpt-4o" +vision_pdf_model = "azure/gpt-4o" + [ingestion.extra_parsers] pdf = "zerox" diff --git a/py/core/configs/full_local_llm.toml b/py/core/configs/full_local_llm.toml index 615805494..e85444744 100644 --- a/py/core/configs/full_local_llm.toml +++ b/py/core/configs/full_local_llm.toml @@ -21,32 +21,28 @@ concurrent_request_limit = 1 [database] provider = "postgres" - [database.kg_creation_settings] - kg_entity_description_prompt = "graphrag_entity_description" - kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" + [database.graph_creation_settings] + graph_entity_description_prompt = "graphrag_entity_description" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted fragment_merge_count = 4 # number of fragments to merge into a single extraction - max_knowledge_triples = 100 + max_knowledge_relationships = 100 max_description_input_length = 65536 - generation_config = { model = "ollama/llama3.1" } # and other params, model used for triplet extraction + generation_config = { model = "ollama/llama3.1" } # and other params, model used for relationshipt extraction - [database.kg_entity_deduplication_settings] - kg_entity_deduplication_type = "by_name" - kg_entity_deduplication_prompt = "graphrag_entity_deduplication" + [database.graph_entity_deduplication_settings] + graph_entity_deduplication_type = "by_name" + graph_entity_deduplication_prompt = "graphrag_entity_deduplication" max_description_input_length = 65536 generation_config = { model = "ollama/llama3.1" } # and other params, model used for deduplication - [database.kg_enrichment_settings] + [database.graph_enrichment_settings] community_reports_prompt = "graphrag_community_reports" max_summary_input_length = 65536 generation_config = { model = "ollama/llama3.1" } # and other params, model used for node description and graph clustering leiden_params = {} - [database.kg_search_settings] - entities_level = "document" # set to collection if you've run deduplication - map_system_prompt = "graphrag_map_system" - reduce_system_prompt = "graphrag_reduce_system" + [database.graph_search_settings] generation_config = { model = "ollama/llama3.1" } diff --git a/py/core/configs/local_llm.toml b/py/core/configs/local_llm.toml index 2c51e5e79..9e2c52d3f 100644 --- a/py/core/configs/local_llm.toml +++ b/py/core/configs/local_llm.toml @@ -28,32 +28,28 @@ concurrent_request_limit = 2 [database] provider = "postgres" - [database.kg_creation_settings] - kg_entity_description_prompt = "graphrag_entity_description" - kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" + [database.graph_creation_settings] + graph_entity_description_prompt = "graphrag_entity_description" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted fragment_merge_count = 4 # number of fragments to merge into a single extraction - max_knowledge_triples = 100 + max_knowledge_relationships = 100 max_description_input_length = 65536 - generation_config = { model = "ollama/llama3.1" } # and other params, model used for triplet extraction + generation_config = { model = "ollama/llama3.1" } # and other params, model used for relationshipt extraction - [database.kg_entity_deduplication_settings] - kg_entity_deduplication_type = "by_name" - kg_entity_deduplication_prompt = "graphrag_entity_deduplication" + [database.graph_entity_deduplication_settings] + graph_entity_deduplication_type = "by_name" + graph_entity_deduplication_prompt = "graphrag_entity_deduplication" max_description_input_length = 65536 generation_config = { model = "ollama/llama3.1" } # and other params, model used for deduplication - [database.kg_enrichment_settings] + [database.graph_enrichment_settings] community_reports_prompt = "graphrag_community_reports" max_summary_input_length = 65536 generation_config = { model = "ollama/llama3.1" } # and other params, model used for node description and graph clustering leiden_params = {} - [database.kg_search_settings] - entities_level = "document" # set to collection if you've run deduplication - map_system_prompt = "graphrag_map_system" - reduce_system_prompt = "graphrag_reduce_system" + [database.graph_search_settings] generation_config = { model = "ollama/llama3.1" } diff --git a/py/core/configs/r2r_azure.toml b/py/core/configs/r2r_azure.toml index 600920e80..5d91cd5e7 100644 --- a/py/core/configs/r2r_azure.toml +++ b/py/core/configs/r2r_azure.toml @@ -10,16 +10,16 @@ # KG settings batch_size = 256 - [database.kg_creation_settings] + [database.graph_creation_settings] generation_config = { model = "azure/gpt-4o-mini" } - [database.kg_entity_deduplication_settings] + [database.graph_entity_deduplication_settings] generation_config = { model = "azure/gpt-4o-mini" } - [database.kg_enrichment_settings] + [database.graph_enrichment_settings] generation_config = { model = "azure/gpt-4o-mini" } - [database.kg_search_settings] + [database.graph_search_settings] generation_config = { model = "azure/gpt-4o-mini" } [embedding] @@ -37,8 +37,9 @@ chunk_overlap = 512 excluded_parsers = ["mp4"] audio_transcription_model="azure/whisper-1" -vision_img_model = "azure/gpt-4o-mini" -vision_pdf_model = "azure/gpt-4o-mini" +document_summary_model = "azure/gpt-4o-mini" +vision_img_model = "azure/gpt-4o" +vision_pdf_model = "azure/gpt-4o" document_summary_model = "azure/gpt-4o-mini" [ingestion.chunk_enrichment_settings] diff --git a/py/core/examples/hello_r2r.py b/py/core/examples/hello_r2r.py index 979f80333..98c77f547 100644 --- a/py/core/examples/hello_r2r.py +++ b/py/core/examples/hello_r2r.py @@ -18,6 +18,6 @@ # RAG Results: # Search Results: -# AggregateSearchResult(vector_search_results=[VectorSearchResult(id=2d71e689-0a0e-5491-a50b-4ecb9494c832, score=0.6848798582029441, metadata={'text': 'John is a person that works at Google.', 'version': 'v0', 'chunk_order': 0, 'document_id': 'ed76b6ee-dd80-5172-9263-919d493b439a', 'id': '1ba494d7-cb2f-5f0e-9f64-76c31da11381', 'associatedQuery': 'Who is john'})], kg_search_results=None) +# AggregateSearchResult(chunk_search_results=[ChunkSearchResult(id=2d71e689-0a0e-5491-a50b-4ecb9494c832, score=0.6848798582029441, metadata={'text': 'John is a person that works at Google.', 'version': 'v0', 'chunk_order': 0, 'document_id': 'ed76b6ee-dd80-5172-9263-919d493b439a', 'id': '1ba494d7-cb2f-5f0e-9f64-76c31da11381', 'associatedQuery': 'Who is john'})], graph_search_results=None) # Completion: # ChatCompletion(id='chatcmpl-9g0HnjGjyWDLADe7E2EvLWa35cMkB', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='John is a person that works at Google [1].', role='assistant', function_call=None, tool_calls=None))], created=1719797903, model='gpt-4o-mini', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=11, prompt_tokens=145, total_tokens=156)) diff --git a/py/core/examples/scripts/advanced_kg_cookbook.py b/py/core/examples/scripts/advanced_kg_cookbook.py index 4084642fc..586888dfe 100644 --- a/py/core/examples/scripts/advanced_kg_cookbook.py +++ b/py/core/examples/scripts/advanced_kg_cookbook.py @@ -126,7 +126,7 @@ def main( client = R2RClient(base_url=base_url) - prompt = "graphrag_triples_extraction_few_shot" + prompt = "graphrag_relationships_extraction_few_shot" client.update_prompt( prompt, diff --git a/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_chunks.py b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_chunks.py new file mode 100644 index 000000000..ec097083e --- /dev/null +++ b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_chunks.py @@ -0,0 +1,114 @@ +import random +import string + +from r2r import R2RClient + +first_created_chunk_id = "abcc4dd6-5f28-596e-a55b-7cf242ca30aa" +second_created_chunk_id = "abcc4dd6-5f28-596e-a55b-7cf242ca30bb" +created_document_id = "defc4dd6-5f28-596e-a55b-7cf242ca30aa" + + +# Function to generate a random email +def generate_random_email(): + username_length = 8 + username = "".join( + random.choices( + string.ascii_lowercase + string.digits, k=username_length + ) + ) + domain = random.choice( + ["example.com", "test.com", "fake.org", "random.net"] + ) + return f"{username}@{domain}" + + +user_email = generate_random_email() + +client = R2RClient("http://localhost:7276", prefix="/v3") + +# First create and authenticate a user if not already done +try: + new_user = client.users.register( + email=user_email, password="new_secure_password123" + ) + print("New user created:", new_user) +except Exception as e: + print("User might already exist:", str(e)) + +# Login +result = client.users.login( + email=user_email, password="new_secure_password123" +) +print("Login successful") + +# Test 1: List chunks +print("\n=== Test 1: List Chunks ===") +list_result = client.chunks.list( + offset=0, + limit=10, + metadata_filter={"key": "value"}, + include_vectors=False, +) +print("Chunks list:", list_result) + +# Test 2: Create chunk and document +print("\n=== Test 2: Create Chunk & Doc. ===") +create_result = client.chunks.create( + chunks=[ + { + "id": first_created_chunk_id, + "document_id": created_document_id, + "collection_ids": ["b4ac4dd6-5f27-596e-a55b-7cf242ca30aa"], + "metadata": {"key": "value"}, + "text": "Some text content", + } + ], + run_with_orchestration=False, +) +print("Created chunk:", create_result) + +# TODO - Update router and uncomment this test +# TODO - Update router and uncomment this test +# Test 3: Create chunk +# print("\n=== Test 3: Create Chunk & Doc. ===") +# create_result = client.chunks.create( +# chunks=[ +# { +# "id": second_created_chunk_id, +# "document_id": created_document_id, +# "collection_ids": ["b4ac4dd6-5f27-596e-a55b-7cf242ca30aa"], +# "metadata": {"key": "value"}, +# "text": "Some text content", +# } +# ], +# run_with_orchestration=False, +# ) +# print("Created chunk:", create_result) + + +# Test 3: Search chunks +print("\n=== Test 3: Search Chunks ===") +search_result = client.chunks.search(query="whoami?") +print("Search results:", search_result) + +# Test 4: Retrieve chunk +print("\n=== Test 4: Retrieve Chunk ===") +retrieve_result = client.chunks.retrieve(id=first_created_chunk_id) +print("Retrieved chunk:", retrieve_result) + +# Test 5: Update chunk +print("\n=== Test 5: Update Chunk ===") +update_result = client.chunks.update( + { + "id": first_created_chunk_id, + "text": "Updated content", + "metadata": {"key": "new value"}, + } +) +print("Updated chunk:", update_result) + + +# Test 4: Retrieve chunk +print("\n=== Test 6: Retrieve Updated Chunk ===") +retrieve_result = client.chunks.retrieve(id=first_created_chunk_id) +print("Retrieved updated chunk:", retrieve_result) diff --git a/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_collections.py b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_collections.py new file mode 100644 index 000000000..296de8f16 --- /dev/null +++ b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_collections.py @@ -0,0 +1,95 @@ +from r2r import R2RClient + +user_email = "John.Doe1@email.com" + +client = R2RClient("http://localhost:7276", prefix="/v3") + +# First create and authenticate a user if not already done +try: + new_user = client.users.register( + email=user_email, password="new_secure_password123" + ) + print("New user created:", new_user) +except Exception as e: + print("User might already exist:", str(e)) + +# Login to get necessary tokens +result = client.users.login( + email=user_email, password="new_secure_password123" +) +print("Login successful") + +# # Test 1: Create a new collection +print("\n=== Test 1: Create Collection ===") +create_result = client.collections.create( + name="Test Collection", + description="A test collection for integration testing", +) +print("Created collection:", create_result) +collection_id = create_result["results"]["collection_id"] + +# Test 2: List collections +# print("\n=== Test 2: List Collections ===") +list_result = client.collections.list(offset=0, limit=10) +print("Collections list:", list_result) + +# Test 3: Get specific collection +print("\n=== Test 3: Get Collection Details ===") +get_result = client.collections.retrieve( + id="3642055e-07aa-5741-986e-6d1b47a8a79c" +) +print("Collection details:", get_result) + +# Test 4: Update collection +print("\n=== Test 4: Update Collection ===") +update_result = client.collections.update( + id=collection_id, + name="Updated Test Collection", + description="Updated description for testing", +) +print("Updated collection:", update_result) + +# # Test 5: Add document to collection +# list user documents +documents = client.documents.list(limit=10, offset=0) +print(documents) + +print("\n=== Test 5: Add Document to Collection ===") +add_doc_result = client.collections.add_document( + id=collection_id, document_id=documents["results"][0]["id"] +) +print("Added document to collection:", add_doc_result) + +# Test 6: List documents in collection +print("\n=== Test 6: List Collection Documents ===") +docs_result = client.collections.list_documents( + id=collection_id, offset=0, limit=10 +) +print("Collection documents:", docs_result) + +# Test 7: Get collection users +print("\n=== Test 7: List Collection Users ===") +users_result = client.collections.list_users( + id=collection_id, offset=0, limit=10 +) +print("Collection users:", users_result) + +# Test 8: Remove document from collection +print("\n=== Test 8: Remove Document from Collection ===") +remove_doc_result = client.collections.remove_document( + id=collection_id, document_id=documents["results"][0]["id"] +) +print("Removed document from collection:", remove_doc_result) + +# Test 9: Delete collection +print("\n=== Test 9: Delete Collection ===") +delete_result = client.collections.delete(id=collection_id) +print("Deleted collection:", delete_result) + +# # Verify deletion by trying to get the collection (should fail) +# print("\n=== Test 10: Verify Deletion ===") +# try: +# client.collections.retrieve(id=collection_id) +# print("ERROR: Collection still exists!") +# except Exception as e: +# print("Successfully verified collection deletion:", str(e)) diff --git a/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_conversations.py b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_conversations.py new file mode 100644 index 000000000..1eaf8ee13 --- /dev/null +++ b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_conversations.py @@ -0,0 +1,87 @@ +import asyncio +import json + +from r2r import R2RClient + +user_email = "John.Doe1@email.com" +client = R2RClient("http://localhost:7276", prefix="/v3") + +# First create and authenticate a user if not already done +try: + new_user = client.users.register( + email=user_email, password="new_secure_password123" + ) + print("New user created:", new_user) +except Exception as e: + print("User might already exist:", str(e)) + +# Login to get necessary tokens +result = client.users.login( + email=user_email, password="new_secure_password123" +) +print("Login successful") + +# Test 1: Create a new conversation +print("\n=== Test 1: Create Conversation ===") +create_result = client.conversations.create() +print("Created conversation:", create_result) +conversation_id = create_result["results"]["conversation_id"] + +# Test 2: List conversations +print("\n=== Test 2: List Conversations ===") +list_result = client.conversations.list(offset=0, limit=10) +print("Conversations list:", list_result) + +# Test 3: Get specific conversation +print("\n=== Test 3: Get Conversation Details ===") +get_result = client.conversations.retrieve(id=conversation_id) +print("Conversation details:", get_result) + +# Test 4: Add message to conversation +print("\n=== Test 4: Add Message to Conversation ===") +add_message_result = client.conversations.add_message( + id=conversation_id, + content="Hello, this is a test message!", + role="user", + metadata={"test_key": "test_value"}, +) +print("Added message to conversation:", add_message_result) +message_id = add_message_result["results"]["message_id"] + +# # Test 5: Update message in conversation +print("\n=== Test 5: Update Message in Conversation ===") +update_message_result = client.conversations.update_message( + id=conversation_id, + message_id=message_id, + content="Updated test message content", +) +print("Updated message in conversation:", update_message_result) + +# # Test 6: List branches in conversation +print("\n=== Test 6: List Branches in Conversation ===") +branches_result = client.conversations.list_branches(id=conversation_id) +print("Conversation branches:", branches_result) + +# Verify deletion by trying to get the conversation (should fail) +print("\n=== Test 7: Retrieve Conversation ===") +result = client.conversations.retrieve(id=conversation_id) +print("Retrieved conversation:", result) + + +# # Test 7: Delete conversation +print("\n=== Test 7.5: Delete Conversation ===") +delete_result = client.conversations.delete(id=conversation_id) +print("Deleted conversation:", delete_result) + +# Verify deletion by trying to get the conversation (should fail) +print("\n=== Test 8: Verify Deletion ===") +try: + result = client.conversations.retrieve(id=conversation_id) + if result["results"] != []: + print("ERROR: Conversation still exists!") +except Exception as e: + print("Successfully verified conversation deletion:", str(e)) + +# Run the async test function +# async def test_conversations(): +# asyncio.run(test_conversations()) diff --git a/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_documents.py b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_documents.py new file mode 100644 index 000000000..85c91baec --- /dev/null +++ b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_documents.py @@ -0,0 +1,67 @@ +from r2r import R2RClient + +first_ingested_document_id = "1b594aea-583a-5a4b-92f4-229d6e5eb886" +first_ingested_file_path = "../../data/pg_essay_1.html" +user_email = "John.Doe1@email.com" + +client = R2RClient("http://localhost:7276", prefix="/v3") + +# First create and authenticate a user if not already done +try: + new_user = client.users.register( + email=user_email, password="new_secure_password123" + ) + print("New user created:", new_user) +except Exception as e: + print("User might already exist:", str(e)) + +# Login to get necessary tokens +result = client.users.login( + email=user_email, password="new_secure_password123" +) +print("Login successful") + +# Test 1: List documents +print("\n=== Test 1: List Documents ===") +list_result = client.documents.list(limit=10, offset=0) +print("Documents list:", list_result) + +# Test 2: Create document +print("\n=== Test 2: Create Document ===") +create_result = client.documents.create( + file_path=first_ingested_file_path, + metadata={"metadata_1": "some random metadata"}, + run_with_orchestration=False, + id=None, +) +print("Created document:", create_result) + +# Test 3: Retrieve document +print("\n=== Test 3: Retrieve Document ===") +retrieve_result = client.documents.retrieve(id=first_ingested_document_id) +print("Retrieved document:", retrieve_result) + +# Test 4: Update document +print("\n=== Test 4: Update Document ===") +update_result = client.documents.update( + file_path=first_ingested_file_path, id=first_ingested_document_id +) +print("Updated document:", update_result) + +# Test 5: List document chunks +print("\n=== Test 5: List Document Chunks ===") +chunks_result = client.documents.list_chunks(id=first_ingested_document_id) +print("Document chunks:", chunks_result) + +# Test 6: List document collections +client.users.logout() +print("\n=== Test 6: List Document Collections ===") +collections_result = client.documents.list_collections( + id=first_ingested_document_id, offset=0, limit=10 +) +print("Document collections:", collections_result) + +# Test 7: Delete document (commented out for safety) +# print("\n=== Test 7: Delete Document ===") +# delete_result = client.documents.delete(id=first_ingested_document_id) +# print("Deleted document:", delete_result) diff --git a/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_graph.py b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_graph.py new file mode 100644 index 000000000..328c77b47 --- /dev/null +++ b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_graph.py @@ -0,0 +1,207 @@ +import time +import uuid + +from r2r import R2RClient + +# Initialize client +client = R2RClient("http://localhost:7276", prefix="/v3") + + +def setup_prerequisites(): + """Setup necessary document and collection""" + print("\n=== Setting up prerequisites ===") + + # # Login + # try: + # client.users.register(email=user_email, password="new_secure_password123") + # except Exception as e: + # print("User might already exist:", str(e)) + + # result = client.users.login(email=user_email, password="new_secure_password123") + # print("Login successful") + + try: + # Create document + doc_result = client.documents.create( + file_path="../../data/pg_essay_1.html", + metadata={"source": "test"}, + run_with_orchestration=False, + ) + print("doc_id = ", doc_result) + doc_id = doc_result["results"]["document_id"] + print(f"Created document with ID: {doc_id}") + except Exception as e: + doc_id = "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + pass + + # Create collection + # collection_id = str(uuid.uuid4()) + collection_result = client.collections.create( + # collection_id=collection_id, + name="Test Collection", + description="Collection for testing graph operations", + ) + print( + "Created collection with ID: " + + str(collection_result["results"]["collection_id"]) + ) + collection_id = collection_result["results"]["collection_id"] + # Add document to collection + client.collections.add_document(id=collection_id, document_id=doc_id) + print(f"Added document {doc_id} to collection {collection_id}") + + return collection_id, doc_id + + +def test_graph_operations(collection_id): + """Test graph CRUD operations""" + print("\n=== Testing Graph Operations ===") + + # Test 1: Create Graph + print("\n--- Test 1: Create Graph ---") + create_result = client.graphs.create( + collection_id=collection_id, + settings={ + "entity_types": ["PERSON", "ORG", "GPE"], + "min_confidence": 0.8, + }, + run_type="estimate", + run_with_orchestration=False, + ) + print("Graph estimation result:", create_result) + + create_result = client.graphs.create( + collection_id=collection_id, + settings={ + "entity_types": ["PERSON", "ORG", "GPE"], + "min_confidence": 0.8, + }, + run_type="run", + run_with_orchestration=False, + ) + print("Graph creation result:", create_result) + + # # # Test 2: Get Graph Status + # # print("\n--- Test 2: Get Graph Status ---") + # # status_result = client.graphs.get_status(collection_id=collection_id) + # # print("Graph status:", status_result) + + # Test 3: List Entities + print("\n--- Test 3: List Entities ---") + entities_result = client.graphs.list_entities( + collection_id=collection_id, + # level="collection", + offset=0, + limit=10, + ) + print("Entities:", entities_result) + + # Test 4: Get Specific Entity + print( + 'entities_result["results"]["entities"][0] = ', + entities_result["results"]["entities"][0], + ) + entity_id = entities_result["results"]["entities"][0][ + "id" + ] # entities_result['items'][0]['id'] + print("entity_id = ", entity_id) + print(f"\n--- Test 4: Get Entity {entity_id} ---") + entity_result = client.graphs.get_entity( + collection_id=collection_id, entity_id=entity_id + ) + print("Entity details:", entity_result) + + # # # # Test 5: List Relationships + # # # print("\n--- Test 5: List Relationships ---") + # # relationships_result = client.graphs.list_relationships( + # # collection_id=collection_id, + # # offset=0, + # # limit=10 + # # ) + # # print("Relationships:", relationships_result) + + # Test 6: Create Communities + print("\n--- Test 6: Create Communities ---") + communities_result = client.graphs.create_communities( + run_type="estimate", + collection_id=collection_id, + run_with_orchestration=False, + # settings={ + # "algorithm": "louvain", + # "resolution": 1.0, + # "min_community_size": 3 + # } + ) + print("Communities estimation result:", communities_result) + + communities_result = client.graphs.create_communities( + run_type="run", + collection_id=collection_id, + run_with_orchestration=False, + # settings={ + # "algorithm": "louvain", + # "resolution": 1.0, + # "min_community_size": 3 + # } + ) + print("Communities creation result:", communities_result) + + # Wait for community creation to complete + + # Test 7: List Communities + print("\n--- Test 7: List Communities ---") + communities_list = client.graphs.list_communities( + collection_id=collection_id, offset=0, limit=10 + ) + print("Communities:", communities_list) + + # Test 8: Tune Prompt + print("\n--- Test 8: Tune Prompt ---") + tune_result = client.graphs.tune_prompt( + collection_id=collection_id, + prompt_name="graphrag_relationships_extraction_few_shot", + documents_limit=100, + chunks_limit=1000, + ) + print("Prompt tuning result:", tune_result) + + # Test 9: Entity Deduplication + print("\n--- Test 9: Entity Deduplication ---") + dedup_result = client.graphs.deduplicate_entities( + collection_id=collection_id, + settings={ + "graph_entity_deduplication_type": "by_name", + "max_description_input_length": 65536, + }, + ) + print("Deduplication result:", dedup_result) + + # Optional: Clean up + # Test 10: Delete Graph + print("\n--- Test 10: Delete Graph ---") + delete_result = client.graphs.delete( + collection_id=collection_id, cascade=True + ) + print("Graph deletion result:", delete_result) + + +def main(): + try: + # Setup prerequisites + # collection_id, doc_id = setup_prerequisites() + collection_id = "42e0efa8-ab92-49e8-ae5b-84215876a632" + + # Run graph operations tests + test_graph_operations(collection_id) + + except Exception as e: + print(f"Error occurred: {str(e)}") + finally: + pass + # Cleanup: Logout + # client.users.logout() + # print("\nLogged out successfully") + + +if __name__ == "__main__": + main() diff --git a/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_indices.py b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_indices.py new file mode 100644 index 000000000..d83d1b7ca --- /dev/null +++ b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_indices.py @@ -0,0 +1,42 @@ +from r2r import R2RClient + +first_index_name = "index_1" +user_email = "John.Doe1@email.com" + +client = R2RClient("http://localhost:7276", prefix="/v3") + +# Login +result = client.users.login( + email=user_email, password="new_secure_password123" +) +print("Login successful") + +# Test 1: Create index +print("\n=== Test 1: Create Index ===") +create_result = client.indices.create_index( + config={ + "index_name": first_index_name, + "vector_size": 768, + "index_type": "hnsw", + "distance_metric": "cosine", + "max_elements": 1000000, + "recreate": True, + }, + run_with_orchestration=False, +) +print("Created index:", create_result) + +# Test 2: List indices +print("\n=== Test 2: List Indices ===") +list_result = client.indices.list_indices(limit=10, offset=0) +print("Indices list:", list_result) + +# Test 3: Get specific index +print("\n=== Test 3: Get Index ===") +get_result = client.indices.get_index(first_index_name) +print("Index details:", get_result) + +# Test 4: Delete index +print("\n=== Test 4: Delete Index ===") +delete_result = client.indices.delete_index(index_name=first_index_name) +print("Deleted index:", delete_result) diff --git a/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_prompts.py b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_prompts.py new file mode 100644 index 000000000..d56b0a963 --- /dev/null +++ b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_prompts.py @@ -0,0 +1,73 @@ +import asyncio + +from r2r import R2RClient + +user_email = "John.Doe1@email.com" +client = R2RClient("http://localhost:7276", prefix="/v3") + +# # First create and authenticate a user if not already done +# try: +# new_user = client.users.register( +# email=user_email, password="new_secure_password123" +# ) +# print("New user created:", new_user) +# except Exception as e: +# print("User might already exist:", str(e)) + +# # Login to get necessary tokens +# result = client.users.login( +# email=user_email, password="new_secure_password123" +# ) +# print("Login successful") + +# Test 1: Create a new prompt +print("\n=== Test 1: Create Prompt ===") +create_result = client.prompts.create( + name="greeting_prompt", + template="Hello, {name}!", + input_types={"name": "string"}, +) +print("Created prompt:", create_result) + +# Test 2: List prompts +print("\n=== Test 2: List Prompts ===") +list_result = client.prompts.list() +print("Prompts list:", list_result) + +# Test 3: Get specific prompt +print("\n=== Test 3: Get Prompt Details ===") +get_result = client.prompts.retrieve( + name="greeting_prompt", + inputs={"name": "John"}, + prompt_override="Hi, {name}!", +) +print("Prompt details:", get_result) + +# Test 4: Update prompt +print("\n=== Test 4: Update Prompt ===") +update_result = client.prompts.update( + name="greeting_prompt", + template="Greetings, {name}!", + input_types={"name": "string", "age": "integer"}, +) +print("Updated prompt:", update_result) + +# Test 5: Retrieve updated prompt +print("\n=== Test 5: Retrieve Updated Prompt ===") +get_updated_result = client.prompts.retrieve( + name="greeting_prompt", inputs={"name": "John", "age": 30} +) +print("Updated prompt details:", get_updated_result) + +# Test 6: Delete prompt +print("\n=== Test 6: Delete Prompt ===") +delete_result = client.prompts.delete(name="greeting_prompt") +print("Deleted prompt:", delete_result) + +# Test 7: Verify deletion by trying to get the prompt (should fail) +print("\n=== Test 7: Verify Deletion ===") +try: + client.prompts.retrieve(name="greeting_prompt") + print("ERROR: Prompt still exists!") +except Exception as e: + print("Successfully verified prompt deletion:", str(e)) diff --git a/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_retrieval.py b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_retrieval.py new file mode 100644 index 000000000..ca7151f89 --- /dev/null +++ b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_retrieval.py @@ -0,0 +1,21 @@ +from r2r import R2RClient + +user_email = "John.Doe1@email.com" + +client = R2RClient("http://localhost:7276", prefix="/v3") + +# Login +result = client.users.login( + email=user_email, password="new_secure_password123" +) +print("Login successful") + +# Test 1: Search retrieval +print("\n=== Test 1: Search Retrieval ===") +search_result = client.retrieval.search(query="whoami?") +print("Search results:", search_result) + +# Test 2: Another search retrieval (for demonstration) +print("\n=== Test 2: Another Search Retrieval ===") +search_result_2 = client.retrieval.search(query="what is the meaning of life?") +print("Search results 2:", search_result_2) diff --git a/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_users.py b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_users.py new file mode 100644 index 000000000..8f6615e10 --- /dev/null +++ b/py/core/examples/scripts/test_v3_sdk/test_v3_sdk_users.py @@ -0,0 +1,80 @@ +import random +import string + +from r2r import R2RClient + +# user_email = "John.Doe1@email.com" + + +# Function to generate a random email +def generate_random_email(): + username_length = 8 + username = "".join( + random.choices( + string.ascii_lowercase + string.digits, k=username_length + ) + ) + domain = random.choice( + ["example.com", "test.com", "fake.org", "random.net"] + ) + return f"{username}@{domain}" + + +user_email = generate_random_email() + +client = R2RClient("http://localhost:7276", prefix="/v3") + +# Test 1: Register user +print("\n=== Test 1: Register User ===") +register_result = client.users.register( + email=user_email, password="secure_password123" +) +print("Registered user:", register_result) + +# Test 2: Login user +print("\n=== Test 2: Login User ===") +login_result = client.users.login( + email=user_email, password="secure_password123" +) +print("Login result:", login_result) + +# Test 3: Refresh token +print("\n=== Test 3: Refresh Token ===") +refresh_result = client.users.refresh_token() +print("Refresh token result:", refresh_result) + +# Test 4: Change password +print("\n=== Test 4: Change Password ===") +change_password_result = client.users.change_password( + "secure_password123", "new_secure_password123" +) +print("Change password result:", change_password_result) + +# Test 5: Request password reset +print("\n=== Test 5: Request Password Reset ===") +reset_request_result = client.users.request_password_reset(email=user_email) +print("Password reset request result:", reset_request_result) + +# logout, to use super user +# Test 9: Logout user +print("\n=== Test 6: Logout User ===") +logout_result = client.users.logout() +print("Logout result:", logout_result) + +# Test 6: List users +print("\n=== Test 7: List Users ===") +users_list = client.users.list() +print("Users list:", users_list) + +# Test 7: Retrieve user +print("\n=== Test 8: Retrieve User ===") +user_id = users_list["results"][0][ + "user_id" +] # Assuming we have at least one user +user_details = client.users.retrieve(id=user_id) +print("User details:", user_details) + +# Test 8: Update user +print("\n=== Test 9: Update User ===") +update_result = client.users.update(user_id, name="Jane Doe") +print("Update user result:", update_result) diff --git a/py/core/main/__init__.py b/py/core/main/__init__.py index 7fb2dea34..d21c5a910 100644 --- a/py/core/main/__init__.py +++ b/py/core/main/__init__.py @@ -16,10 +16,6 @@ ## R2R API # Routes "AuthRouter", - "IngestionRouter", - "ManagementRouter", - "RetrievalRouter", - "KGRouter", ## R2R APP "R2RApp", ## R2R ASSEMBLY diff --git a/py/core/main/abstractions.py b/py/core/main/abstractions.py index 672e516b3..fac161701 100644 --- a/py/core/main/abstractions.py +++ b/py/core/main/abstractions.py @@ -17,11 +17,11 @@ PostgresDBProvider, R2RAuthProvider, R2RIngestionProvider, + SendGridEmailProvider, SimpleOrchestrationProvider, SqlitePersistentLoggingProvider, SupabaseAuthProvider, UnstructuredIngestionProvider, - SendGridEmailProvider, ) @@ -51,7 +51,7 @@ class R2RPipes(BaseModel): parsing_pipe: AsyncPipe embedding_pipe: AsyncPipe kg_search_pipe: AsyncPipe - kg_triples_extraction_pipe: AsyncPipe + kg_relationships_extraction_pipe: AsyncPipe kg_storage_pipe: AsyncPipe kg_entity_description_pipe: AsyncPipe kg_clustering_pipe: AsyncPipe diff --git a/py/core/main/api/__init__.py b/py/core/main/api/__init__.py deleted file mode 100644 index 6b50c605a..000000000 --- a/py/core/main/api/__init__.py +++ /dev/null @@ -1,16 +0,0 @@ -from .auth_router import AuthRouter -from .base_router import BaseRouter -from .ingestion_router import IngestionRouter -from .kg_router import KGRouter -from .management_router import ManagementRouter -from .retrieval_router import RetrievalRouter - -__all__ = [ - # Routes - "AuthRouter", - "IngestionRouter", - "ManagementRouter", - "RetrievalRouter", - "BaseRouter", - "KGRouter", -] diff --git a/py/core/main/api/data/ingestion_router_openapi.yml b/py/core/main/api/data/ingestion_router_openapi.yml deleted file mode 100644 index d494ca1f6..000000000 --- a/py/core/main/api/data/ingestion_router_openapi.yml +++ /dev/null @@ -1,207 +0,0 @@ -ingest_files: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.ingest_files( - file_paths=["pg_essay_1.html", "got.txt"], - metadatas=[{"metadata_1":"some random metadata"}, {"metadata_2": "some other random metadata"}], - document_ids=None - ) - - lang: Shell - source: | - curl -X POST "https://api.example.com/ingest_files" \ - -H "Content-Type: multipart/form-data" \ - -H "Authorization: Bearer YOUR_API_KEY" \ - -F "file=@pg_essay_1.html;type=text/html" \ - -F "file=@got.txt;type=text/plain" \ - -F 'metadatas=[{},{}]' \ - -F 'document_ids=null' - - input_descriptions: - files: "The list of paths of input files to ingest into the system." - metadatas: "An optional list of JSON metadata to affix to each file" - document_ids: "An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method." - ingestion_config: "An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration." - run_with_orchestration: "Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result." - -update_files: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.update_files( - file_paths=["pg_essay_1_v2.txt"], - document_ids=["b4ac4dd6-5f27-596e-a55b-7cf242ca30aa"] - ) - - lang: Shell - source: | - curl -X POST "https://api.example.com/update_files" \ - -H "Content-Type: multipart/form-data" \ - -H "Authorization: Bearer YOUR_API_KEY" \ - -F "file=@pg_essay_1_v2.txt;type=text/plain" \ - -F 'document_ids=["b4ac4dd6-5f27-596e-a55b-7cf242ca30aa"]' - - input_descriptions: - files: "The list of paths of input files to update in the system." - document_ids: "An optional list of document ids for each file. If not provided, the system will attempt to generate the corresponding unique from the `generate_document_id` method." - metadatas: "An optional list of JSON metadata to affix to each file" - ingestion_config: "JSON string for chunking configuration override" - run_with_orchestration: "Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result." - -ingest_chunks: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.ingest_chunks( - chunks=[ - { - "text": "Another chunk of text", - }, - { - "text": "Yet another chunk of text", - }, - { - "text": "A chunk of text", - }, - ], - ) - - lang: Shell - source: | - curl -X POST "https://api.example.com/ingest_chunks" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer YOUR_API_KEY" \ - -d '{ - "chunks": [ - { - "text": "Another chunk of text" - }, - { - "text": "Yet another chunk of text" - }, - { - "text": "A chunk of text" - } - ], - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "metadata": {} - }' - - input_descriptions: - chunks: "A list of text chunks to ingest into the system." - document_id: "An optional document id to associate the chunks with. If not provided, a unique document id will be generated." - metadata: "Optional JSON metadata to associate with the ingested chunks." - -list_vector_indices: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.list_vector_indices( - table_name="vectors", - concurrently=False - ) - - lang: Shell - source: | - curl -X GET "http://localhost:7276/v2/list_vector_indices" \ - -H "Content-Type: application/json" \ - -d '{ - "table_name": "vectors", - "concurrently": false - }' - - input_descriptions: - table_name: "The name of the table to list indices for. Options: vectors, entities_document, entities_collection, communities" - concurrently: "Whether to perform the operation concurrently" - -create_vector_index: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.create_vector_index( - table_name="vectors", - index_method="hnsw", - index_measure="cosine_distance", - index_arguments={"m": 16, "ef_construction": 64}, - concurrently=True - ) - - lang: Shell - source: | - curl -X POST "http://localhost:7276/v2/create_vector_index" \ - -H "Content-Type: application/json" \ - -d '{ - "table_name": "vectors", - "index_method": "hnsw", - "index_measure": "cosine_distance", - "index_arguments": { - "m": 16, - "ef_construction": 64 - }, - "concurrently": true - }' - - input_descriptions: - table_name: "The table to create the index on. Default: vectors" - index_method: "The indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw" - index_measure: "Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance" - index_name: "Optional custom name for the index. If not provided, one will be auto-generated" - index_column: "The column containing the vectors to index. Default: `vec`, or `vec_binary` when using hamming or jaccard distance." - index_arguments: "Configuration parameters for the chosen index method. For HNSW: {m: int, ef_construction: int}. For IVFFlat: {n_lists: int}" - concurrently: "Whether to create the index concurrently. Default: true" - -delete_vector_index: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.delete_vector_index( - index_name="ix_vector_cosine_ops_hnsw__20241021211541", - table_name="vectors", - concurrently=True - ) - - lang: Shell - source: | - curl -X DELETE "http://localhost:7276/v2/delete_vector_index" \ - -H "Content-Type: application/json" \ - -d '{ - "index_name": "ix_vector_cosine_ops_hnsw__20241021211541", - "table_name": "vectors", - "concurrently": true - }' - - input_descriptions: - index_name: "The name of the index to delete" - table_name: "The name of the table containing the index. Default: vectors" - concurrently: "Whether to delete the index concurrently. Default: true" diff --git a/py/core/main/api/data/kg_router_openapi.yml b/py/core/main/api/data/kg_router_openapi.yml deleted file mode 100644 index bb92e6c1d..000000000 --- a/py/core/main/api/data/kg_router_openapi.yml +++ /dev/null @@ -1,41 +0,0 @@ -create_graph: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.create_graph( - document_ids = ["3e157b3a-8469-51db-90d9-52e7d896b49b"], # optional - ) - - lang: Shell - source: | - curl -X POST "https://api.example.com/create_graph" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer YOUR_API_KEY" \ - -d '{ - "document_ids": ["3e157b3a-8469-51db-90d9-52e7d896b49b"] - }' - - -enrich_graph: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient, KGEnrichmentSettings - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - settings = KGEnrichmentSettings() - - result = client.enrich_graph(settings) - - lang: Shell - source: | - curl -X POST "https://api.example.com/enrich_graph" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer YOUR_API_KEY" diff --git a/py/core/main/api/data/retrieval_router_openapi.yml b/py/core/main/api/data/retrieval_router_openapi.yml deleted file mode 100644 index 4add0a1bd..000000000 --- a/py/core/main/api/data/retrieval_router_openapi.yml +++ /dev/null @@ -1,225 +0,0 @@ -search: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.search( - query="Who is Aristotle?", - vector_search_settings={ - "use_vector_search": True, - "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, - "search_limit": 20, - "use_hybrid_search": True - }, - kg_search_settings={ - "use_kg_search": True, # graph needs to be constructed first - "kg_search_type": "local", - "kg_search_level": "0", - "generation_config": { - "model": "gpt-4o-mini", - "temperature": 0.7, - }, - "local_search_limits": { - "__Entity__": 20, - "__Relationship__": 20, - "__Community__": 20, - }, - "max_community_description_length": 65536, - "max_llm_queries_for_global_search": 250 - } - ) - - lang: Shell - source: | - curl -X POST "https://api.example.com/search" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer YOUR_API_KEY" \ - -d '{ - "query": "Who is Aristotle?", - "vector_search_settings": { - "use_vector_search": true, - "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, - "search_limit": 20, - "use_hybrid_search": true - }, - "kg_search_settings": { - "use_kg_search": true, # graph needs to be constructed first - "kg_search_type": "local", - "kg_search_level": "0", - "generation_config": { - "model": "gpt-4o-mini", - "temperature": 0.7 - }, - "local_search_limits": { - "__Entity__": 20, - "__Relationship__": 20, - "__Community__": 20, - }, - "max_community_description_length": 65536, - "max_llm_queries_for_global_search": 250 - } - }' - - input_descriptions: - query: "Search query" - vector_search_settings: "Vector search settings" - kg_search_settings: "Knowledge graph search settings" - -rag: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.rag( - query="Who is Aristotle?", - vector_search_settings={ - "use_vector_search": True, - "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, - "search_limit": 20, - "use_hybrid_search": True - }, - kg_search_settings={ - "use_kg_search": True, - "kg_search_type": "local", - "kg_search_level": "0", - "generation_config": { - "model": "gpt-4o-mini", - "temperature": 0.7, - }, - "local_search_limits": { - "__Entity__": 20, - "__Relationship__": 20, - "__Community__": 20, - }, - "max_community_description_length": 65536, - "max_llm_queries_for_global_search": 250 - }, - rag_generation_config={ - "stream": False, - "temperature": 0.7, - "max_tokens": 150 - } - ) - - lang: Shell - source: | - curl -X POST "https://api.example.com/rag" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer YOUR_API_KEY" \ - -d '{ - "query": "Who is Aristotle?", - "vector_search_settings": { - "use_vector_search": true, - "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, - "search_limit": 20, - "use_hybrid_search": True - }, - "kg_search_settings": { - "use_kg_search": true, # graph needs to be constructed first - "kg_search_type": "local", - "kg_search_level": "0", - "generation_config": { - "model": "gpt-4o-mini", - "temperature": 0.7 - }, - "local_search_limits": { - "__Entity__": 20, - "__Relationship__": 20, - "__Community__": 20, - }, - "max_community_description_length": 65536, - "max_llm_queries_for_global_search": 250 - }, - "rag_generation_config": { - "stream": false, - "temperature": 0.7, - "max_tokens": 150 - } - }' - - input_descriptions: - query: "RAG query" - vector_search_settings: "Vector search settings" - kg_search_settings: "Knowledge graph search settings" - rag_generation_config: "RAG generation configuration" - task_prompt_override: "Task prompt override" - strategy: "The RAG strategy to use (default | HyDE | RAG Fusion)" - include_title_if_available: "Includes document title in chunk response, if available." - search_strategy: "The RAG strategy to use (default | hyde | rag_fusion)" - -agent: - openapi_extra: - x-codeSamples: - - lang: Python - source: | - from r2r import R2RClient - - client = R2RClient("http://localhost:7272") - # when using auth, do client.login(...) - - result = client.agent( - messages=[ - {"role": "user", "content": "Who is the greatest philospher of all time?"}, - {"role": "assistant", "content": "Aristotle is widely considered the greatest philospher of all time."}, - {"role": "user", "content": "Can you tell me more about him?"} - ], - vector_search_settings={ - "use_vector_search": True, - "filters": {"document_id": {"$eq": "5e157b3a-8469-51db-90d9-52e7d896b49b"}}, - "search_limit": 20, - "use_hybrid_search": True - }, - rag_generation_config={ - "stream": False, - "temperature": 0.7, - "max_tokens": 200 - }, - include_title_if_available=True - ) - - lang: Shell - source: | - curl -X POST "https://api.example.com/agent" \ - -H "Content-Type: application/json" \ - -H "Authorization: Bearer YOUR_API_KEY" \ - -d '{ - "messages": [ - {"role": "user", "content": "Who is the greatest philospher of all time?"}, - {"role": "assistant", "content": "Aristotle is widely considered the greatest philospher of all time."}, - {"role": "user", "content": "Can you tell me more about him?"} - ], - "vector_search_settings": { - "use_vector_search": true, - "filters": {"document_id": {"$eq": "5e157b3a-8469-51db-90d9-52e7d896b49b"}}, - "search_limit": 20, - "use_hybrid_search": true - }, - "kg_search_settings": { - "use_kg_search": false # to enable this, please read the graphrag cookbook - }, - "rag_generation_config": { - "stream": false, - "temperature": 0.7, - "max_tokens": 200 - }, - "include_title_if_available": true - }' - - input_descriptions: - messages: "List of message objects (deprecated, use message instead)" - message: "Message object" - vector_search_settings: "Vector search settings" - kg_search_settings: "Knowledge graph search settings" - rag_generation_config: "RAG generation configuration" - task_prompt_override: "Task prompt override" - include_title_if_available: "Includes document title in chunk response, if available." - rag_strategy: "The RAG strategy to use (default | hyde | rag_fusion)" - conversation_id: "The ID of the conversation, a new ID is assigned if not provided" - branch_id: "The ID of the branch" diff --git a/py/core/main/api/kg_router.py b/py/core/main/api/kg_router.py deleted file mode 100644 index 1f7c4bfcd..000000000 --- a/py/core/main/api/kg_router.py +++ /dev/null @@ -1,502 +0,0 @@ -import logging -from pathlib import Path -from typing import Optional, Union -from uuid import UUID - -import yaml -from fastapi import Body, Depends, Query - -from core.base import Workflow -from core.base.abstractions import EntityLevel, KGRunType -from core.base.api.models import ( - WrappedKGCommunitiesResponse, - WrappedKGCreationResponse, - WrappedKGEnrichmentResponse, - WrappedKGEntitiesResponse, - WrappedKGEntityDeduplicationResponse, - WrappedKGTriplesResponse, - WrappedKGTunePromptResponse, -) -from core.base.logger.base import RunType -from core.providers import ( - HatchetOrchestrationProvider, - SimpleOrchestrationProvider, -) -from core.utils import ( - generate_default_user_collection_id, - update_settings_from_dict, -) - -from ..services.kg_service import KgService -from .base_router import BaseRouter - -logger = logging.getLogger() - - -class KGRouter(BaseRouter): - def __init__( - self, - service: KgService, - orchestration_provider: Optional[ - Union[HatchetOrchestrationProvider, SimpleOrchestrationProvider] - ] = None, - run_type: RunType = RunType.KG, - ): - if not orchestration_provider: - raise ValueError("KGRouter requires an orchestration provider.") - super().__init__(service, orchestration_provider, run_type) - self.service: KgService = service - - def _load_openapi_extras(self): - yaml_path = Path(__file__).parent / "data" / "kg_router_openapi.yml" - with open(yaml_path, "r") as yaml_file: - yaml_content = yaml.safe_load(yaml_file) - return yaml_content - - def _register_workflows(self): - - workflow_messages = {} - if self.orchestration_provider.config.provider == "hatchet": - workflow_messages["create-graph"] = ( - "Graph creation task queued successfully." - ) - workflow_messages["enrich-graph"] = ( - "Graph enrichment task queued successfully." - ) - workflow_messages["entity-deduplication"] = ( - "KG Entity Deduplication task queued successfully." - ) - else: - workflow_messages["create-graph"] = ( - "Graph created successfully, please run enrich-graph to enrich the graph for GraphRAG." - ) - workflow_messages["enrich-graph"] = ( - "Graph enriched successfully. You can view the communities at http://localhost:7272/v2/communities" - ) - workflow_messages["entity-deduplication"] = ( - "KG Entity Deduplication completed successfully." - ) - - self.orchestration_provider.register_workflows( - Workflow.KG, - self.service, - workflow_messages, - ) - - def _setup_routes(self): - @self.router.post( - "/create_graph", - ) - @self.base_endpoint - async def create_graph( - collection_id: Optional[UUID] = Body( - default=None, - description="Collection ID to create graph for.", - ), - run_type: Optional[KGRunType] = Body( - default=None, - description="Run type for the graph creation process.", - ), - kg_creation_settings: Optional[dict] = Body( - default=None, - description="Settings for the graph creation process.", - ), - run_with_orchestration: Optional[bool] = Body(True), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ): # -> WrappedKGCreationResponse: # type: ignore - """ - Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. - If document IDs are not provided, the graph will be created on all documents in the system. - This step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information. - In order to do GraphRAG, you will need to run the enrich_graph endpoint. - """ - if not auth_user.is_superuser: - logger.warning("Implement permission checks here.") - - logger.info(f"Running create-graph on collection {collection_id}") - - # If no collection ID is provided, use the default user collection - if not collection_id: - collection_id = generate_default_user_collection_id( - auth_user.id - ) - - # If no run type is provided, default to estimate - if not run_type: - run_type = KGRunType.ESTIMATE - - # Apply runtime settings overrides - server_kg_creation_settings = ( - self.service.providers.database.config.kg_creation_settings - ) - - if kg_creation_settings: - server_kg_creation_settings = update_settings_from_dict( - server_kg_creation_settings, kg_creation_settings - ) - - # If the run type is estimate, return an estimate of the creation cost - if run_type is KGRunType.ESTIMATE: - return await self.service.get_creation_estimate( - collection_id, server_kg_creation_settings - ) - else: - - # Otherwise, create the graph - if run_with_orchestration: - workflow_input = { - "collection_id": str(collection_id), - "kg_creation_settings": server_kg_creation_settings.model_dump_json(), - "user": auth_user.json(), - } - - return await self.orchestration_provider.run_workflow( # type: ignore - "create-graph", {"request": workflow_input}, {} - ) - else: - from core.main.orchestration import simple_kg_factory - - logger.info("Running create-graph without orchestration.") - simple_kg = simple_kg_factory(self.service) - await simple_kg["create-graph"](workflow_input) - return { - "message": "Graph created successfully.", - "task_id": None, - } - - @self.router.post( - "/enrich_graph", - ) - @self.base_endpoint - async def enrich_graph( - collection_id: Optional[UUID] = Body( - default=None, - description="Collection ID to enrich graph for.", - ), - run_type: Optional[KGRunType] = Body( - default=KGRunType.ESTIMATE, - description="Run type for the graph enrichment process.", - ), - kg_enrichment_settings: Optional[dict] = Body( - default=None, - description="Settings for the graph enrichment process.", - ), - run_with_orchestration: Optional[bool] = Body(True), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ): # -> WrappedKGEnrichmentResponse: - """ - This endpoint enriches the graph with additional information. - It creates communities of nodes based on their similarity and adds embeddings to the graph. - This step is necessary for GraphRAG to work. - """ - if not auth_user.is_superuser: - logger.warning("Implement permission checks here.") - - # If no collection ID is provided, use the default user collection - if not collection_id: - collection_id = generate_default_user_collection_id( - auth_user.id - ) - - # If no run type is provided, default to estimate - if not run_type: - run_type = KGRunType.ESTIMATE - - # Apply runtime settings overrides - server_kg_enrichment_settings = ( - self.service.providers.database.config.kg_enrichment_settings - ) - if kg_enrichment_settings: - server_kg_enrichment_settings = update_settings_from_dict( - server_kg_enrichment_settings, kg_enrichment_settings - ) - - # If the run type is estimate, return an estimate of the enrichment cost - if run_type is KGRunType.ESTIMATE: - return await self.service.get_enrichment_estimate( - collection_id, server_kg_enrichment_settings - ) - - # Otherwise, run the enrichment workflow - else: - if run_with_orchestration: - workflow_input = { - "collection_id": str(collection_id), - "kg_enrichment_settings": server_kg_enrichment_settings.model_dump_json(), - "user": auth_user.json(), - } - - return await self.orchestration_provider.run_workflow( # type: ignore - "enrich-graph", {"request": workflow_input}, {} - ) - else: - from core.main.orchestration import simple_kg_factory - - logger.info("Running enrich-graph without orchestration.") - simple_kg = simple_kg_factory(self.service) - await simple_kg["enrich-graph"](workflow_input) - return { - "message": "Graph enriched successfully.", - "task_id": None, - } - - @self.router.get("/entities") - @self.base_endpoint - async def get_entities( - collection_id: Optional[UUID] = Query( - None, description="Collection ID to retrieve entities from." - ), - entity_level: Optional[EntityLevel] = Query( - default=EntityLevel.DOCUMENT, - description="Type of entities to retrieve. Options are: raw, dedup_document, dedup_collection.", - ), - entity_ids: Optional[list[str]] = Query( - None, description="Entity IDs to filter by." - ), - offset: int = Query(0, ge=0, description="Offset for pagination."), - limit: int = Query( - 100, - ge=-1, - description="Number of items to return. Use -1 to return all items.", - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedKGEntitiesResponse: - """ - Retrieve entities from the knowledge graph. - """ - if not auth_user.is_superuser: - logger.warning("Implement permission checks here.") - - if not collection_id: - collection_id = generate_default_user_collection_id( - auth_user.id - ) - - if entity_level == EntityLevel.CHUNK: - entity_table_name = "chunk_entity" - elif entity_level == EntityLevel.DOCUMENT: - entity_table_name = "document_entity" - else: - entity_table_name = "collection_entity" - - return await self.service.get_entities( - collection_id, - entity_ids, - entity_table_name, - offset, - limit, - ) - - @self.router.get("/triples") - @self.base_endpoint - async def get_triples( - collection_id: Optional[UUID] = Query( - None, description="Collection ID to retrieve triples from." - ), - entity_names: Optional[list[str]] = Query( - None, description="Entity names to filter by." - ), - triple_ids: Optional[list[str]] = Query( - None, description="Triple IDs to filter by." - ), - offset: int = Query(0, ge=0, description="Offset for pagination."), - limit: int = Query( - 100, - ge=-1, - description="Number of items to return. Use -1 to return all items.", - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedKGTriplesResponse: - """ - Retrieve triples from the knowledge graph. - """ - if not auth_user.is_superuser: - logger.warning("Implement permission checks here.") - - if not collection_id: - collection_id = generate_default_user_collection_id( - auth_user.id - ) - - return await self.service.get_triples( - collection_id, - entity_names, - triple_ids, - offset, - limit, - ) - - @self.router.get("/communities") - @self.base_endpoint - async def get_communities( - collection_id: Optional[UUID] = Query( - None, description="Collection ID to retrieve communities from." - ), - levels: Optional[list[int]] = Query( - None, description="Levels to filter by." - ), - community_numbers: Optional[list[int]] = Query( - None, description="Community numbers to filter by." - ), - offset: int = Query(0, ge=0, description="Offset for pagination."), - limit: int = Query( - 100, - ge=-1, - description="Number of items to return. Use -1 to return all items.", - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedKGCommunitiesResponse: - """ - Retrieve communities from the knowledge graph. - """ - if not auth_user.is_superuser: - logger.warning("Implement permission checks here.") - - if not collection_id: - collection_id = generate_default_user_collection_id( - auth_user.id - ) - - return await self.service.get_communities( - collection_id, - levels, - community_numbers, - offset, - limit, - ) - - @self.router.post("/deduplicate_entities") - @self.base_endpoint - async def deduplicate_entities( - collection_id: Optional[UUID] = Body( - None, description="Collection ID to deduplicate entities for." - ), - run_type: Optional[KGRunType] = Body( - None, description="Run type for the deduplication process." - ), - deduplication_settings: Optional[dict] = Body( - None, description="Settings for the deduplication process." - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedKGEntityDeduplicationResponse: - """ - Deduplicate entities in the knowledge graph. - """ - if not auth_user.is_superuser: - logger.warning("Implement permission checks here.") - - if not collection_id: - collection_id = generate_default_user_collection_id( - auth_user.id - ) - - if not run_type: - run_type = KGRunType.ESTIMATE - - server_deduplication_settings = ( - self.service.providers.database.config.kg_entity_deduplication_settings - ) - - logger.info( - f"Server deduplication settings: {server_deduplication_settings}" - ) - - if deduplication_settings: - server_deduplication_settings = update_settings_from_dict( - server_deduplication_settings, deduplication_settings - ) - - logger.info( - f"Running deduplicate_entities on collection {collection_id}" - ) - logger.info(f"Input data: {server_deduplication_settings}") - - if run_type == KGRunType.ESTIMATE: - return await self.service.get_deduplication_estimate( - collection_id, server_deduplication_settings - ) - - workflow_input = { - "collection_id": str(collection_id), - "run_type": run_type, - "kg_entity_deduplication_settings": server_deduplication_settings.model_dump_json(), - "user": auth_user.json(), - } - - return await self.orchestration_provider.run_workflow( # type: ignore - "entity-deduplication", {"request": workflow_input}, {} - ) - - @self.router.get("/tuned_prompt") - @self.base_endpoint - async def get_tuned_prompt( - prompt_name: str = Query( - ..., - description="The name of the prompt to tune. Valid options are 'kg_triples_extraction_prompt', 'kg_entity_description_prompt' and 'community_reports_prompt'.", - ), - collection_id: Optional[UUID] = Query( - None, description="Collection ID to retrieve communities from." - ), - documents_offset: Optional[int] = Query( - 0, description="Offset for document pagination." - ), - documents_limit: Optional[int] = Query( - 100, description="Limit for document pagination." - ), - chunks_offset: Optional[int] = Query( - 0, description="Offset for chunk pagination." - ), - chunks_limit: Optional[int] = Query( - 100, description="Limit for chunk pagination." - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedKGTunePromptResponse: - """ - Auto-tune the prompt for a specific collection. - """ - if not auth_user.is_superuser: - logger.warning("Implement permission checks here.") - - if not collection_id: - collection_id = generate_default_user_collection_id( - auth_user.id - ) - - return await self.service.tune_prompt( - prompt_name=prompt_name, - collection_id=collection_id, - documents_offset=documents_offset, - documents_limit=documents_limit, - chunks_offset=chunks_offset, - chunks_limit=chunks_limit, - ) - - @self.router.delete("/delete_graph_for_collection") - @self.base_endpoint - async def delete_graph_for_collection( - collection_id: UUID = Body( # FIXME: This should be a path parameter - ..., description="Collection ID to delete graph for." - ), - cascade: bool = Body( # FIXME: This should be a query parameter - default=False, - description="Whether to cascade the deletion, and delete entities and triples belonging to the collection.", - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ): - """ - Delete the graph for a given collection. Note that this endpoint may delete a large amount of data created by the KG pipeline, this deletion is irreversible, and recreating the graph may be an expensive operation. - - Notes: - The endpoint deletes all communities for a given collection. If the cascade flag is set to true, the endpoint also deletes all the entities and triples associated with the collection. - - WARNING: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. - - """ - if not auth_user.is_superuser: - logger.warning("Implement permission checks here.") - - await self.service.delete_graph_for_collection( - collection_id, cascade - ) - - return {"message": "Graph deleted successfully."} diff --git a/py/core/main/api/management_router.py b/py/core/main/api/management_router.py deleted file mode 100644 index 1116cb7df..000000000 --- a/py/core/main/api/management_router.py +++ /dev/null @@ -1,966 +0,0 @@ -# TODO - Cleanup the handling for non-auth configurations -import json -import mimetypes -import os -from datetime import datetime, timezone -from typing import Optional, Set, Union -from uuid import UUID - -import psutil -from fastapi import Body, Depends, Path, Query -from fastapi.responses import StreamingResponse -from pydantic import Json - -from core.base import Message, R2RException -from core.base.api.models import ( - WrappedAddUserResponse, - WrappedAnalyticsResponse, - WrappedAppSettingsResponse, - WrappedCollectionListResponse, - WrappedCollectionOverviewResponse, - WrappedCollectionResponse, - WrappedConversationResponse, - WrappedConversationsOverviewResponse, - WrappedDeleteResponse, - WrappedDocumentChunkResponse, - WrappedDocumentOverviewResponse, - WrappedGetPromptsResponse, - WrappedLogResponse, - WrappedPromptMessageResponse, - WrappedServerStatsResponse, - WrappedUserCollectionResponse, - WrappedUserOverviewResponse, - WrappedUsersInCollectionResponse, -) -from core.base.logger import AnalysisTypes, LogFilterCriteria -from core.providers import ( - HatchetOrchestrationProvider, - SimpleOrchestrationProvider, -) - -from ...base.logger.base import RunType -from ..services.management_service import ManagementService -from .base_router import BaseRouter - - -class ManagementRouter(BaseRouter): - def __init__( - self, - service: ManagementService, - orchestration_provider: Union[ - HatchetOrchestrationProvider, SimpleOrchestrationProvider - ], - run_type: RunType = RunType.MANAGEMENT, - ): - super().__init__(service, orchestration_provider, run_type) - self.service: ManagementService = service # for type hinting - self.start_time = datetime.now(timezone.utc) - - # TODO: remove this from the management route, it should be at the base of the server - def _setup_routes(self): - @self.router.get("/health") - @self.base_endpoint - async def health_check(): - return {"response": "ok"} - - @self.router.get("/server_stats") - @self.base_endpoint - async def server_stats( - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedServerStatsResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only an authorized user can call the `server_stats` endpoint.", - 403, - ) - return { # type: ignore - "start_time": self.start_time.isoformat(), - "uptime_seconds": ( - datetime.now(timezone.utc) - self.start_time - ).total_seconds(), - "cpu_usage": psutil.cpu_percent(), - "memory_usage": psutil.virtual_memory().percent, - } - - @self.router.post("/update_prompt") - @self.base_endpoint - async def update_prompt_app( - name: str = Body(..., description="Prompt name"), - template: Optional[str] = Body( - None, description="Prompt template" - ), - input_types: Optional[dict[str, str]] = Body( - {}, description="Input types" - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedPromptMessageResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the `update_prompt` endpoint.", - 403, - ) - - result = await self.service.update_prompt( - name, template, input_types - ) - return result # type: ignore - - @self.router.post("/add_prompt") - @self.base_endpoint - async def add_prompt_app( - name: str = Body(..., description="Prompt name"), - template: str = Body(..., description="Prompt template"), - input_types: dict[str, str] = Body({}, description="Input types"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedPromptMessageResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the `add_prompt` endpoint.", - 403, - ) - result = await self.service.add_prompt(name, template, input_types) - return result # type: ignore - - @self.router.get("/get_prompt/{prompt_name}") - @self.base_endpoint - async def get_prompt_app( - prompt_name: str = Path(..., description="Prompt name"), - inputs: Optional[Json[dict]] = Query( - None, description="JSON-encoded prompt inputs" - ), - prompt_override: Optional[str] = Query( - None, description="Prompt override" - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedPromptMessageResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the `get_prompt` endpoint.", - 403, - ) - result = await self.service.get_prompt( - prompt_name, inputs, prompt_override - ) - return result # type: ignore - - @self.router.get("/get_all_prompts") - @self.base_endpoint - async def get_all_prompts_app( - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedGetPromptsResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the `get_all_prompts` endpoint.", - 403, - ) - result = await self.service.get_all_prompts() - return {"prompts": result} # type: ignore - - @self.router.delete("/delete_prompt/{prompt_name}") - @self.base_endpoint - async def delete_prompt_app( - prompt_name: str = Path(..., description="Prompt name"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDeleteResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the `delete_prompt` endpoint.", - 403, - ) - await self.service.delete_prompt(prompt_name) - return None # type: ignore - - @self.router.get("/analytics") - @self.base_endpoint - async def get_analytics_app( - filter_criteria: Optional[Json[dict]] = Query({}), - analysis_types: Optional[Json[dict]] = Query({}), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedAnalyticsResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the `analytics` endpoint.", 403 - ) - - try: - result = await self.service.analytics( - filter_criteria=LogFilterCriteria(filters=filter_criteria), - analysis_types=AnalysisTypes( - analysis_types=analysis_types - ), - ) - return result - except json.JSONDecodeError as e: - raise R2RException( - f"Invalid JSON in query parameters: {str(e)}", 400 - ) - except ValueError as e: - raise R2RException( - f"Invalid data in query parameters: {str(e)}", 400 - ) - - # TODO: should we add a message to the response model with warnings i.e. if the max_runs passed in violates the max_runs limit? - @self.router.get("/logs") - @self.base_endpoint - async def logs_app( - run_type_filter: Optional[str] = Query(""), - offset: int = Query(0, ge=0), - limit: int = Query(100, ge=1, le=1000), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedLogResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the `logs` endpoint.", 403 - ) - - return await self.service.logs( - run_type_filter=run_type_filter, - offset=offset, - limit=limit, - ) - - @self.router.get("/app_settings") - @self.base_endpoint - async def app_settings( - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedAppSettingsResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the `app_settings` endpoint.", - 403, - ) - return await self.service.app_settings() - - @self.router.get("/users_overview") - @self.base_endpoint - async def users_overview_app( - user_ids: Optional[list[str]] = Query([]), - offset: int = Query(0, ge=0), - limit: int = Query(100, ge=1, le=1000), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedUserOverviewResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the `users_overview` endpoint.", - 403, - ) - - user_uuids = ( - [UUID(user_id) for user_id in user_ids] if user_ids else None - ) - - users_overview_response = await self.service.users_overview( - user_ids=user_uuids, offset=offset, limit=limit - ) - - return users_overview_response["results"], { # type: ignore - "total_entries": users_overview_response["total_entries"] - } - - @self.router.delete("/delete", status_code=204) - @self.base_endpoint - async def delete_app( - filters: str = Query(..., description="JSON-encoded filters"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ): - try: - filters_dict = json.loads(filters) - except json.JSONDecodeError: - raise R2RException( - status_code=422, message="Invalid JSON in filters" - ) - - if not isinstance(filters_dict, dict): - raise R2RException( - status_code=422, message="Filters must be a JSON object" - ) - - for key, value in filters_dict.items(): - if not isinstance(value, dict): - raise R2RException( - status_code=422, - message=f"Invalid filter format for key: {key}", - ) - - return await self.service.delete(filters=filters_dict) - - @self.router.get( - "/download_file/{document_id}", response_class=StreamingResponse - ) - @self.base_endpoint - async def download_file_app( - document_id: str = Path(..., description="Document ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ): - """ - Download a file by its document ID as a stream. - """ - # TODO: Add a check to see if the user has access to the file - - try: - document_uuid = UUID(document_id) - except ValueError: - raise R2RException( - status_code=422, message="Invalid document ID format." - ) - - file_tuple = await self.service.download_file(document_uuid) - if not file_tuple: - raise R2RException(status_code=404, message="File not found.") - - file_name, file_content, file_size = file_tuple - - mime_type, _ = mimetypes.guess_type(file_name) - if not mime_type: - mime_type = "application/octet-stream" - - async def file_stream(): - chunk_size = 1024 * 1024 # 1MB - while True: - data = file_content.read(chunk_size) - if not data: - break - yield data - - return StreamingResponse( # type: ignore - file_stream(), - media_type=mime_type, - headers={ - "Content-Disposition": f'inline; filename="{file_name}"', - "Content-Length": str(file_size), - }, - ) - - @self.router.get("/documents_overview") - @self.base_endpoint - async def documents_overview_app( - document_ids: list[str] = Query([]), - offset: int = Query(0, ge=0), - limit: int = Query( - 1_000, - ge=-1, - description="Number of items to return. Use -1 to return all items.", - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDocumentOverviewResponse: - request_user_ids = ( - None if auth_user.is_superuser else [auth_user.id] - ) - - filter_collection_ids = ( - None if auth_user.is_superuser else auth_user.collection_ids - ) - - document_uuids = [ - UUID(document_id) for document_id in document_ids - ] - documents_overview_response = ( - await self.service.documents_overview( - user_ids=request_user_ids, - collection_ids=filter_collection_ids, - document_ids=document_uuids, - offset=offset, - limit=limit, - ) - ) - return documents_overview_response["results"], { # type: ignore - "total_entries": documents_overview_response["total_entries"] - } - - @self.router.get("/document_chunks/{document_id}") - @self.base_endpoint - async def document_chunks_app( - document_id: str = Path(...), - offset: Optional[int] = Query(0, ge=0), - limit: Optional[int] = Query(100, ge=0), - include_vectors: Optional[bool] = Query(False), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDocumentChunkResponse: - document_uuid = UUID(document_id) - - document_chunks = await self.service.document_chunks( - document_uuid, offset, limit, include_vectors - ) - - document_chunks_result = document_chunks["results"] - - if not document_chunks_result: - raise R2RException( - "No chunks found for the given document ID.", - 404, - ) - - is_owner = str(document_chunks_result[0].get("user_id")) == str( - auth_user.id - ) - document_collections = await self.service.document_collections( - document_uuid, 0, -1 - ) - - user_has_access = ( - is_owner - or set(auth_user.collection_ids).intersection( - set( - [ - ele.collection_id - for ele in document_collections["results"] - ] - ) - ) - != set() - ) - - if not user_has_access and not auth_user.is_superuser: - raise R2RException( - "Only a superuser can arbitrarily call document_chunks.", - 403, - ) - - return document_chunks_result, { # type: ignore - "total_entries": document_chunks["total_entries"] - } - - @self.router.get("/collections_overview") - @self.base_endpoint - async def collections_overview_app( - collection_ids: Optional[list[str]] = Query(None), - offset: Optional[int] = Query(0, ge=0), - limit: Optional[int] = Query(100, ge=1, le=1000), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionOverviewResponse: - user_collections: Optional[Set[UUID]] = ( - None - if auth_user.is_superuser - else {UUID(str(cid)) for cid in auth_user.collection_ids} - ) - - filtered_collections: Optional[Set[UUID]] = None - - if collection_ids: - input_collections = {UUID(cid) for cid in collection_ids} - if user_collections is not None: - filtered_collections = input_collections.intersection( - user_collections - ) - else: - filtered_collections = input_collections - else: - filtered_collections = user_collections - - collections_overview_response = ( - await self.service.collections_overview( - collection_ids=( - [str(cid) for cid in filtered_collections] - if filtered_collections is not None - else None - ), - offset=offset, - limit=limit, - ) - ) - - return collections_overview_response["results"], { # type: ignore - "total_entries": collections_overview_response["total_entries"] - } - - @self.router.post("/create_collection") - @self.base_endpoint - async def create_collection_app( - name: str = Body(..., description="Collection name"), - description: Optional[str] = Body( - "", description="Collection description" - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionResponse: - collection_id = await self.service.create_collection( - name, description - ) - await self.service.add_user_to_collection( # type: ignore - auth_user.id, collection_id.collection_id - ) - return collection_id - - @self.router.get("/get_collection/{collection_id}") - @self.base_endpoint - async def get_collection_app( - collection_id: str = Path(..., description="Collection ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionResponse: - collection_uuid = UUID(collection_id) - if ( - not auth_user.is_superuser - and collection_uuid not in auth_user.collection_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified collection.", - 403, - ) - - result = await self.service.get_collection(collection_uuid) - return result # type: ignore - - @self.router.put("/update_collection") - @self.base_endpoint - async def update_collection_app( - collection_id: str = Body(..., description="Collection ID"), - name: Optional[str] = Body( - None, description="Updated collection name" - ), - description: Optional[str] = Body( - None, description="Updated collection description" - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionResponse: - collection_uuid = UUID(collection_id) - if ( - not auth_user.is_superuser - and collection_uuid not in auth_user.collection_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified collection.", - 403, - ) - - return await self.service.update_collection( # type: ignore - collection_uuid, name, description - ) - - @self.router.delete("/delete_collection/{collection_id}") - @self.base_endpoint - async def delete_collection_app( - collection_id: str = Path(..., description="Collection ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDeleteResponse: - collection_uuid = UUID(collection_id) - if ( - not auth_user.is_superuser - and collection_uuid not in auth_user.collection_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified collection.", - 403, - ) - await self.service.delete_collection(collection_uuid) - return None # type: ignore - - @self.router.get("/list_collections") - @self.base_endpoint - async def list_collections_app( - offset: int = Query(0, ge=0), - limit: int = Query(100, ge=1, le=1000), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionListResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can call the list collections endpoint.", - 403, - ) - list_collections_response = await self.service.list_collections( - offset=offset, limit=min(max(limit, 1), 1000) - ) - - return list_collections_response["results"], { # type: ignore - "total_entries": list_collections_response["total_entries"] - } - - @self.router.post("/add_user_to_collection") - @self.base_endpoint - async def add_user_to_collection_app( - user_id: str = Body(..., description="User ID"), - collection_id: str = Body(..., description="Collection ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedAddUserResponse: - collection_uuid = UUID(collection_id) - user_uuid = UUID(user_id) - if ( - not auth_user.is_superuser - and collection_uuid not in auth_user.collection_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified collection.", - 403, - ) - - result = await self.service.add_user_to_collection( - user_uuid, collection_uuid - ) - return result # type: ignore - - @self.router.post("/remove_user_from_collection") - @self.base_endpoint - async def remove_user_from_collection_app( - user_id: str = Body(..., description="User ID"), - collection_id: str = Body(..., description="Collection ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ): - collection_uuid = UUID(collection_id) - user_uuid = UUID(user_id) - if ( - not auth_user.is_superuser - and collection_uuid not in auth_user.collection_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified collection.", - 403, - ) - - await self.service.remove_user_from_collection( - user_uuid, collection_uuid - ) - return None # type: ignore - - # TODO - Proivde response model - @self.router.get("/get_users_in_collection/{collection_id}") - @self.base_endpoint - async def get_users_in_collection_app( - collection_id: str = Path(..., description="Collection ID"), - offset: int = Query(0, ge=0, description="Pagination offset"), - limit: int = Query( - 100, ge=1, le=1000, description="Pagination limit" - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedUsersInCollectionResponse: - collection_uuid = UUID(collection_id) - if ( - not auth_user.is_superuser - and collection_uuid not in auth_user.collection_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified collection.", - 403, - ) - - users_in_collection_response = ( - await self.service.get_users_in_collection( - collection_id=collection_uuid, - offset=offset, - limit=min(max(limit, 1), 1000), - ) - ) - - return users_in_collection_response["results"], { # type: ignore - "total_entries": users_in_collection_response["total_entries"] - } - - @self.router.get("/user_collections/{user_id}") - @self.base_endpoint - async def get_collections_for_user_app( - user_id: str = Path(..., description="User ID"), - offset: int = Query(0, ge=0, description="Pagination offset"), - limit: int = Query( - 100, ge=1, le=1000, description="Pagination limit" - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedUserCollectionResponse: - if str(auth_user.id) != user_id and not auth_user.is_superuser: - raise R2RException( - "The currently authenticated user does not have access to the specified collection.", - 403, - ) - user_uuid = UUID(user_id) - user_collection_response = ( - await self.service.get_collections_for_user( - user_uuid, offset, limit - ) - ) - - return user_collection_response["results"], { # type: ignore - "total_entries": user_collection_response["total_entries"] - } - - @self.router.post("/assign_document_to_collection") - @self.base_endpoint - async def assign_document_to_collection_app( - document_id: str = Body(..., description="Document ID"), - collection_id: str = Body(..., description="Collection ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ): - collection_uuid = UUID(collection_id) - document_uuid = UUID(document_id) - if ( - not auth_user.is_superuser - and collection_uuid not in auth_user.collection_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified collection.", - 403, - ) - - return await self.service.assign_document_to_collection( # type: ignore - document_uuid, collection_uuid - ) - - @self.router.post("/remove_document_from_collection") - @self.base_endpoint - async def remove_document_from_collection_app( - document_id: str = Body(..., description="Document ID"), - collection_id: str = Body(..., description="Collection ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDeleteResponse: - collection_uuid = UUID(collection_id) - document_uuid = UUID(document_id) - if ( - not auth_user.is_superuser - and collection_uuid not in auth_user.collection_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified collection.", - 403, - ) - - return await self.service.remove_document_from_collection( - document_uuid, collection_uuid - ) - - @self.router.get("/document_collections/{document_id}") - @self.base_endpoint - async def document_collections_app( - document_id: str = Path(..., description="Document ID"), - offset: int = Query(0, ge=0), - limit: int = Query(100, ge=1, le=1000), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionListResponse: - if not auth_user.is_superuser: - raise R2RException( - "Only a superuser can get the collections belonging to a document.", - 403, - ) - document_collections_response = ( - await self.service.document_collections( - document_id, offset, limit - ) - ) - - return document_collections_response["results"], { # type: ignore - "total_entries": document_collections_response["total_entries"] - } - - @self.router.get("/collection/{collection_id}/documents") - @self.base_endpoint - async def documents_in_collection_app( - collection_id: str = Path(..., description="Collection ID"), - offset: int = Query(0, ge=0), - limit: int = Query(100, ge=1, le=1000), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDocumentOverviewResponse: - collection_uuid = UUID(collection_id) - if ( - not auth_user.is_superuser - and collection_uuid not in auth_user.collection_ids - ): - raise R2RException( - "The currently authenticated user does not have access to the specified collection.", - 403, - ) - - documents_in_collection_response = ( - await self.service.documents_in_collection( - collection_uuid, offset, limit - ) - ) - - return documents_in_collection_response["results"], { # type: ignore - "total_entries": documents_in_collection_response[ - "total_entries" - ] - } - - @self.router.get("/conversations_overview") - @self.base_endpoint - async def conversations_overview_app( - conversation_ids: list[str] = Query([]), - user_ids: Optional[list[str]] = Query(None), - offset: int = Query(0, ge=0), - limit: int = Query(100, ge=-1, le=1000), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedConversationsOverviewResponse: - conversation_uuids = [ - UUID(conversation_id) for conversation_id in conversation_ids - ] - - if auth_user.is_superuser: - user_ids = [UUID(uid) for uid in user_ids] if user_ids else None # type: ignore - else: - if user_ids: - raise R2RException( - message="Non-superusers cannot query other users' conversations", - status_code=403, - ) - user_ids = [auth_user.id] - - conversations_overview_response = ( - await self.service.conversations_overview( - user_ids=user_ids, - conversation_ids=conversation_uuids, - offset=offset, - limit=limit, - ) - ) - - return conversations_overview_response["results"], { # type: ignore - "total_entries": conversations_overview_response[ - "total_entries" - ] - } - - @self.router.get("/get_conversation/{conversation_id}") - @self.base_endpoint - async def get_conversation( - conversation_id: str = Path(..., description="Conversation ID"), - branch_id: str = Query(None, description="Branch ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedConversationResponse: - - if not auth_user.is_superuser: - has_access = await self.service.verify_conversation_access( - conversation_id, auth_user.id - ) - if not has_access: - raise R2RException( - message="You do not have access to this conversation", - status_code=403, - ) - - result = await self.service.get_conversation( - conversation_id, - branch_id, - ) - return result - - @self.router.post("/create_conversation") - @self.base_endpoint - async def create_conversation( - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> dict: - return await self.service.create_conversation( - user_id=auth_user.id if auth_user else None - ) - - @self.router.post("/add_message/{conversation_id}") - @self.base_endpoint - async def add_message( - conversation_id: str = Path(..., description="Conversation ID"), - message: Message = Body(..., description="Message content"), - parent_id: Optional[str] = Body( - None, description="Parent message ID" - ), - metadata: Optional[dict] = Body(None, description="Metadata"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> dict: - if not auth_user.is_superuser: - has_access = await self.service.verify_conversation_access( - conversation_id, auth_user.id - ) - if not has_access: - raise R2RException( - message="You do not have access to this conversation", - status_code=403, - ) - - message_id = await self.service.add_message( - conversation_id, message, parent_id, metadata - ) - return {"message_id": message_id} - - @self.router.put("/update_message/{message_id}") - @self.base_endpoint - async def edit_message( - message_id: str = Path(..., description="Message ID"), - message: str = Body(..., description="New content"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> dict: - # TODO: Add a check to see if the user has access to the message - - new_message_id, new_branch_id = await self.service.edit_message( - message_id, message - ) - return { - "new_message_id": new_message_id, - "new_branch_id": new_branch_id, - } - - @self.router.patch("/messages/{message_id}/metadata") - @self.base_endpoint - async def update_message_metadata( - message_id: str = Path(..., description="Message ID"), - metadata: dict = Body(..., description="Metadata to update"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ): - """Update metadata for a specific message. - - The provided metadata will be merged with existing metadata. - New keys will be added, existing keys will be updated. - """ - await self.service.update_message_metadata(message_id, metadata) - return "ok" - - @self.router.get("/export/messages") - @self.base_endpoint - async def export_messages( - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ): - if not auth_user.is_superuser: - raise R2RException( - "Only an authorized user can call the `export/messages` endpoint.", - 403, - ) - return await self.service.export_messages_to_csv( - return_type="stream" - ) - - @self.router.get("/branches_overview/{conversation_id}") - @self.base_endpoint - async def branches_overview( - conversation_id: str = Path(..., description="Conversation ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> dict: - if not auth_user.is_superuser: - has_access = await self.service.verify_conversation_access( - conversation_id, auth_user.id - ) - if not has_access: - raise R2RException( - message="You do not have access to this conversation's branches", - status_code=403, - ) - branches = await self.service.branches_overview(conversation_id) - return {"branches": branches} - - # TODO: Publish this endpoint once more testing is done - # @self.router.get("/get_next_branch/{branch_id}") - # @self.base_endpoint - # async def get_next_branch( - # branch_id: str = Path(..., description="Current branch ID"), - # auth_user=Depends(self.service.providers.auth.auth_wrapper), - # ) -> dict: - # next_branch_id = await self.service.get_next_branch(branch_id) - # return {"next_branch_id": next_branch_id} - - # TODO: Publish this endpoint once more testing is done - # @self.router.get("/get_previous_branch/{branch_id}") - # @self.base_endpoint - # async def get_prev_branch( - # branch_id: str = Path(..., description="Current branch ID"), - # auth_user=Depends(self.service.providers.auth.auth_wrapper), - # ) -> dict: - # prev_branch_id = await self.service.get_prev_branch(branch_id) - # return {"prev_branch_id": prev_branch_id} - - # TODO: Publish this endpoint once more testing is done - # @self.router.post("/branch_at_message/{message_id}") - # @self.base_endpoint - # async def branch_at_message( - # message_id: str = Path(..., description="Message ID"), - # auth_user=Depends(self.service.providers.auth.auth_wrapper), - # ) -> dict: - # branch_id = await self.service.branch_at_message(message_id) - # return {"branch_id": branch_id} - - @self.router.delete("/delete_conversation/{conversation_id}") - @self.base_endpoint - async def delete_conversation( - conversation_id: str = Path(..., description="Conversation ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDeleteResponse: - await self.service.delete_conversation(conversation_id) - return None # type: ignore diff --git a/py/core/main/api/retrieval_router.py b/py/core/main/api/retrieval_router.py deleted file mode 100644 index ae45d62bd..000000000 --- a/py/core/main/api/retrieval_router.py +++ /dev/null @@ -1,389 +0,0 @@ -import asyncio -from pathlib import Path -from typing import Any, Optional, Union -from uuid import UUID - -import yaml -from fastapi import Body, Depends -from fastapi.responses import StreamingResponse - -from core.base import ( - GenerationConfig, - KGSearchSettings, - Message, - R2RException, - SearchSettings, -) -from core.base.api.models import ( - WrappedCompletionResponse, - WrappedDocumentSearchResponse, - WrappedRAGAgentResponse, - WrappedRAGResponse, - WrappedSearchResponse, -) -from core.base.logger.base import RunType -from core.providers import ( - HatchetOrchestrationProvider, - SimpleOrchestrationProvider, -) - -from ..services.retrieval_service import RetrievalService -from .base_router import BaseRouter - - -class RetrievalRouter(BaseRouter): - def __init__( - self, - service: RetrievalService, - orchestration_provider: Union[ - HatchetOrchestrationProvider, SimpleOrchestrationProvider - ], - run_type: RunType = RunType.RETRIEVAL, - ): - super().__init__(service, orchestration_provider, run_type) - self.service: RetrievalService = service # for type hinting - - def _load_openapi_extras(self): - yaml_path = ( - Path(__file__).parent / "data" / "retrieval_router_openapi.yml" - ) - with open(yaml_path, "r") as yaml_file: - yaml_content = yaml.safe_load(yaml_file) - return yaml_content - - def _register_workflows(self): - pass - - def _select_filters( - self, - auth_user: Any, - search_settings: Union[SearchSettings, KGSearchSettings], - ) -> dict[str, Any]: - selected_collections = { - str(cid) for cid in set(search_settings.selected_collection_ids) - } - - if auth_user.is_superuser: - if selected_collections: - # For superusers, we only filter by selected collections - filters = { - "collection_ids": {"$overlap": list(selected_collections)} - } - else: - filters = {} - else: - user_collections = set(auth_user.collection_ids) - - if selected_collections: - allowed_collections = user_collections.intersection( - selected_collections - ) - else: - allowed_collections = user_collections - # for non-superusers, we filter by user_id and selected & allowed collections - filters = { - "$or": [ - {"user_id": {"$eq": auth_user.id}}, - { - "collection_ids": { - "$overlap": list(allowed_collections) - } - }, - ] # type: ignore - } - - if search_settings.filters != {}: - filters = {"$and": [filters, search_settings.filters]} # type: ignore - - return filters - - def _setup_routes(self): - search_extras = self.openapi_extras.get("search", {}) - search_descriptions = search_extras.get("input_descriptions", {}) - - @self.router.post( - "/search_documents", - openapi_extra=search_extras.get("openapi_extra"), - ) - @self.base_endpoint - async def search_documents( - query: str = Body( - ..., description=search_descriptions.get("query") - ), - settings: SearchSettings = Body( - default_factory=SearchSettings, - description="Settings for document search", - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDocumentSearchResponse: # type: ignore - """ - Perform a search query on the vector database and knowledge graph. - - This endpoint allows for complex filtering of search results using PostgreSQL-based queries. - Filters can be applied to various fields such as document_id, and internal metadata values. - - - Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. - """ - - query_embedding = ( - await self.service.providers.embedding.async_get_embedding( - query - ) - ) - results = await self.service.search_documents( - query=query, - query_embedding=query_embedding, - settings=settings, - ) - return results - - @self.router.post( - "/search", - openapi_extra=search_extras.get("openapi_extra"), - ) - @self.base_endpoint - async def search_app( - query: str = Body( - ..., description=search_descriptions.get("query") - ), - vector_search_settings: SearchSettings = Body( - default_factory=SearchSettings, - description=search_descriptions.get("vector_search_settings"), - ), - kg_search_settings: KGSearchSettings = Body( - default_factory=KGSearchSettings, - description=search_descriptions.get("kg_search_settings"), - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedSearchResponse: # type: ignore - """ - Perform a search query on the vector database and knowledge graph. - - This endpoint allows for complex filtering of search results using PostgreSQL-based queries. - Filters can be applied to various fields such as document_id, and internal metadata values. - - - Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. - """ - - vector_search_settings.filters = self._select_filters( - auth_user, vector_search_settings - ) - - kg_search_settings.filters = self._select_filters( - auth_user, kg_search_settings - ) - - results = await self.service.search( - query=query, - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, - ) - return results - - rag_extras = self.openapi_extras.get("rag", {}) - rag_descriptions = rag_extras.get("input_descriptions", {}) - - @self.router.post( - "/rag", - openapi_extra=rag_extras.get("openapi_extra"), - ) - @self.base_endpoint - async def rag_app( - query: str = Body(..., description=rag_descriptions.get("query")), - vector_search_settings: SearchSettings = Body( - default_factory=SearchSettings, - description=rag_descriptions.get("vector_search_settings"), - ), - kg_search_settings: KGSearchSettings = Body( - default_factory=KGSearchSettings, - description=rag_descriptions.get("kg_search_settings"), - ), - rag_generation_config: GenerationConfig = Body( - default_factory=GenerationConfig, - description=rag_descriptions.get("rag_generation_config"), - ), - task_prompt_override: Optional[str] = Body( - None, description=rag_descriptions.get("task_prompt_override") - ), - include_title_if_available: bool = Body( - False, - description=rag_descriptions.get("include_title_if_available"), - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedRAGResponse: # type: ignore - """ - Execute a RAG (Retrieval-Augmented Generation) query. - - This endpoint combines search results with language model generation. - It supports the same filtering capabilities as the search endpoint, - allowing for precise control over the retrieved context. - - The generation process can be customized using the rag_generation_config parameter. - """ - - vector_search_settings.filters = self._select_filters( - auth_user, vector_search_settings - ) - - response = await self.service.rag( - query=query, - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, - rag_generation_config=rag_generation_config, - task_prompt_override=task_prompt_override, - include_title_if_available=include_title_if_available, - ) - - if rag_generation_config.stream: - - async def stream_generator(): - async for chunk in response: - yield chunk - await asyncio.sleep(0) - - return StreamingResponse( - stream_generator(), media_type="application/json" - ) # type: ignore - else: - return response - - agent_extras = self.openapi_extras.get("agent", {}) - agent_descriptions = agent_extras.get("input_descriptions", {}) - - @self.router.post( - "/agent", - openapi_extra=agent_extras.get("openapi_extra"), - ) - @self.base_endpoint - async def agent_app( - message: Optional[Message] = Body( - None, description=agent_descriptions.get("message") - ), - messages: Optional[list[Message]] = Body( - None, - description=agent_descriptions.get("messages"), - deprecated=True, - ), - vector_search_settings: SearchSettings = Body( - default_factory=SearchSettings, - description=agent_descriptions.get("vector_search_settings"), - ), - kg_search_settings: KGSearchSettings = Body( - default_factory=KGSearchSettings, - description=agent_descriptions.get("kg_search_settings"), - ), - rag_generation_config: GenerationConfig = Body( - default_factory=GenerationConfig, - description=agent_descriptions.get("rag_generation_config"), - ), - task_prompt_override: Optional[str] = Body( - None, - description=agent_descriptions.get("task_prompt_override"), - ), - include_title_if_available: bool = Body( - True, - description=agent_descriptions.get( - "include_title_if_available" - ), - ), - conversation_id: Optional[UUID] = Body( - None, description=agent_descriptions.get("conversation_id") - ), - branch_id: Optional[UUID] = Body( - None, description=agent_descriptions.get("branch_id") - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedRAGAgentResponse: # type: ignore - """ - Implement an agent-based interaction for complex query processing. - - This endpoint supports multi-turn conversations and can handle complex queries - by breaking them down into sub-tasks. It uses the same filtering capabilities - as the search and RAG endpoints for retrieving relevant information. - - The agent's behavior can be customized using the rag_generation_config and - task_prompt_override parameters. - """ - - vector_search_settings.filters = self._select_filters( - auth_user, vector_search_settings - ) - - kg_search_settings.filters = vector_search_settings.filters - try: - response = await self.service.agent( - message=message, - messages=messages, - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, - rag_generation_config=rag_generation_config, - task_prompt_override=task_prompt_override, - include_title_if_available=include_title_if_available, - conversation_id=( - str(conversation_id) if conversation_id else None - ), - branch_id=str(branch_id) if branch_id else None, - ) - - if rag_generation_config.stream: - - async def stream_generator(): - content = "" - async for chunk in response: - yield chunk - content += chunk - await asyncio.sleep(0) - - return StreamingResponse( - stream_generator(), media_type="application/json" - ) # type: ignore - else: - return response - except Exception as e: - raise R2RException(str(e), 500) - - @self.router.post("/completion") - @self.base_endpoint - async def completion( - messages: list[Message] = Body( - ..., description="The messages to complete" - ), - generation_config: GenerationConfig = Body( - default_factory=GenerationConfig, - description="The generation config", - ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - response_model=WrappedCompletionResponse, - ): - """ - Generate completions for a list of messages. - - This endpoint uses the language model to generate completions for the provided messages. - The generation process can be customized using the generation_config parameter. - """ - print("messages = ", messages) - - return await self.service.completion( - messages=[message.to_dict() for message in messages], - generation_config=generation_config, - ) - - @self.router.post("/embedding") - @self.base_endpoint - async def embedding( - content: str = Body(..., description="The content to embed"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), - response_model=WrappedCompletionResponse, - ): - """ - Generate completions for a list of messages. - - This endpoint uses the language model to generate completions for the provided messages. - The generation process can be customized using the generation_config parameter. - """ - - return await self.service.providers.embedding.async_get_embedding( - text=content - ) diff --git a/py/core/main/api/auth_router.py b/py/core/main/api/v3/auth_router.py similarity index 83% rename from py/core/main/api/auth_router.py rename to py/core/main/api/v3/auth_router.py index 8bcbe3c48..9095c789c 100644 --- a/py/core/main/api/auth_router.py +++ b/py/core/main/api/v3/auth_router.py @@ -19,24 +19,26 @@ SimpleOrchestrationProvider, ) -from ...base.logger.base import RunType -from ..services.auth_service import AuthService -from .base_router import BaseRouter +from ....base.logger.base import RunType +from ...services.auth_service import AuthService +from .base_router import BaseRouterV3 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") -class AuthRouter(BaseRouter): +class AuthRouter(BaseRouterV3): def __init__( self, - service: AuthService, + providers, + services, orchestration_provider: Union[ HatchetOrchestrationProvider, SimpleOrchestrationProvider ], run_type: RunType = RunType.UNSPECIFIED, ): - super().__init__(service, orchestration_provider, run_type) - self.service: AuthService = service # for type hinting + super().__init__(providers, services, orchestration_provider, run_type) + + self.services = services # for type hinting def _setup_routes(self): @self.router.post("/register", response_model=WrappedUserResponse) @@ -48,7 +50,7 @@ async def register_app( """ Register a new user with the given email and password. """ - result = await self.service.register(email, password) + result = await self.services["auth"].register(email, password) return result @self.router.post( @@ -67,7 +69,9 @@ async def verify_email_app( This endpoint is used to confirm a user's email address using the verification code sent to their email after registration. """ - result = await self.service.verify_email(email, verification_code) + result = await self.services["auth"].verify_email( + email, verification_code + ) return GenericMessageResponse(message=result["message"]) @self.router.post("/login", response_model=WrappedTokenResponse) @@ -81,7 +85,7 @@ async def login_app( This endpoint authenticates a user using their email (username) and password, and returns access and refresh tokens upon successful authentication. """ - login_result = await self.service.login( + login_result = await self.services["auth"].login( form_data.username, form_data.password ) return login_result @@ -92,20 +96,24 @@ async def login_app( @self.base_endpoint async def logout_app( token: str = Depends(oauth2_scheme), - auth_user=Depends(self.service.providers.auth.auth_wrapper), + auth_user=Depends( + self.services["auth"].providers.auth.auth_wrapper + ), ): """ Log out the current user. This endpoint invalidates the user's current access token, effectively logging them out. """ - result = await self.service.logout(token) + result = await self.services["auth"].logout(token) return GenericMessageResponse(message=result["message"]) @self.router.get("/user", response_model=WrappedUserResponse) @self.base_endpoint async def get_user_app( - auth_user=Depends(self.service.providers.auth.auth_wrapper), + auth_user=Depends( + self.services["auth"].providers.auth.auth_wrapper + ), ): """ Get the current user's profile information. @@ -129,7 +137,9 @@ async def put_user_app( profile_picture: str | None = Body( None, description="Updated profile picture URL" ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), + auth_user=Depends( + self.services["auth"].providers.auth.auth_wrapper + ), ): """ Update the current user's profile information. @@ -149,7 +159,7 @@ async def put_user_app( 403, ) - return await self.service.update_user( + return await self.services["auth"].update_user( user_id=user_id, email=email, is_superuser=is_superuser, @@ -170,7 +180,7 @@ async def refresh_access_token_app( This endpoint allows users to obtain a new access token using their refresh token. """ - refresh_result = await self.service.refresh_access_token( + refresh_result = await self.services["auth"].refresh_access_token( refresh_token=refresh_token, ) return refresh_result @@ -182,7 +192,9 @@ async def refresh_access_token_app( async def change_password_app( current_password: str = Body(..., description="Current password"), new_password: str = Body(..., description="New password"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), + auth_user=Depends( + self.services["auth"].providers.auth.auth_wrapper + ), ): """ Change the authenticated user's password. @@ -190,7 +202,7 @@ async def change_password_app( This endpoint allows users to change their password by providing their current password and a new password. """ - result = await self.service.change_password( + result = await self.services["auth"].change_password( auth_user, current_password, new_password, @@ -211,7 +223,7 @@ async def request_password_reset_app( This endpoint initiates the password reset process by sending a reset link to the specified email address. """ - result = await self.service.request_password_reset(email) + result = await self.services["auth"].request_password_reset(email) return GenericMessageResponse(message=result["message"]) @self.router.post( @@ -223,7 +235,7 @@ async def reset_password_app( reset_token: str = Body(..., description="Password reset token"), new_password: str = Body(..., description="New password"), ): - result = await self.service.confirm_password_reset( + result = await self.services["auth"].confirm_password_reset( reset_token, new_password ) return GenericMessageResponse(message=result["message"]) @@ -241,7 +253,9 @@ async def delete_user_app( False, description="Whether to delete the user's vector data", ), - auth_user=Depends(self.service.providers.auth.auth_wrapper), + auth_user=Depends( + self.services["auth"].providers.auth.auth_wrapper + ), ): """ Delete a user account. @@ -254,7 +268,7 @@ async def delete_user_app( if not auth_user.is_superuser and not password: raise Exception("Password is required for non-superusers") user_uuid = UUID(user_id) - result = await self.service.delete_user( + result = await self.services["auth"].delete_user( user_uuid, password, delete_vector_data ) return GenericMessageResponse(message=result["message"]) @@ -263,7 +277,9 @@ async def delete_user_app( @self.base_endpoint async def get_user_verification_code( user_id: str = Path(..., description="User ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), + auth_user=Depends( + self.services["auth"].providers.auth.auth_wrapper + ), ) -> WrappedVerificationResult: """ Get only the verification code for a specific user. @@ -281,14 +297,18 @@ async def get_user_verification_code( raise R2RException( status_code=400, message="Invalid user ID format" ) - result = await self.service.get_user_verification_code(user_uuid) + result = await self.services["auth"].get_user_verification_code( + user_uuid + ) return result @self.router.get("/user/{user_id}/reset_token") @self.base_endpoint async def get_user_reset_token( user_id: str = Path(..., description="User ID"), - auth_user=Depends(self.service.providers.auth.auth_wrapper), + auth_user=Depends( + self.services["auth"].providers.auth.auth_wrapper + ), ) -> WrappedResetDataResult: """ Get only the verification code for a specific user. @@ -306,7 +326,9 @@ async def get_user_reset_token( raise R2RException( status_code=400, message="Invalid user ID format" ) - result = await self.service.get_user_reset_token(user_uuid) + result = await self.services["auth"].get_user_reset_token( + user_uuid + ) if not result["reset_token"]: raise R2RException( status_code=404, message="No reset token found" @@ -328,5 +350,5 @@ async def send_reset_email_app( This endpoint is particularly useful for sandbox/testing environments where direct access to verification codes is needed. """ - result = await self.service.send_reset_email(email) + result = await self.services["auth"].send_reset_email(email) return result diff --git a/py/core/main/api/base_router.py b/py/core/main/api/v3/base_router.py similarity index 78% rename from py/core/main/api/base_router.py rename to py/core/main/api/v3/base_router.py index afdeda8ff..b0c59b809 100644 --- a/py/core/main/api/base_router.py +++ b/py/core/main/api/v3/base_router.py @@ -1,33 +1,20 @@ import functools import logging from abc import abstractmethod -from typing import Callable, Union +from typing import Callable from fastapi import APIRouter, HTTPException from fastapi.responses import StreamingResponse from core.base import R2RException, manage_run -from core.base.logger.base import RunType -from core.providers import ( - HatchetOrchestrationProvider, - SimpleOrchestrationProvider, -) - -from ..services.base import Service logger = logging.getLogger() -class BaseRouter: - def __init__( - self, - service: "Service", - orchestration_provider: Union[ - HatchetOrchestrationProvider, SimpleOrchestrationProvider - ], - run_type: RunType = RunType.UNSPECIFIED, - ): - self.service = service +class BaseRouterV3: + def __init__(self, providers, services, orchestration_provider, run_type): + self.providers = providers + self.services = services self.run_type = run_type self.orchestration_provider = orchestration_provider self.router = APIRouter() @@ -42,11 +29,13 @@ def base_endpoint(self, func: Callable): @functools.wraps(func) async def wrapper(*args, **kwargs): async with manage_run( - self.service.run_manager, func.__name__ + self.services["ingestion"].run_manager, func.__name__ ) as run_id: auth_user = kwargs.get("auth_user") if auth_user: - await self.service.run_manager.log_run_info( + await self.services[ + "ingestion" + ].run_manager.log_run_info( # TODO - this is a bit of a hack run_type=self.run_type, user=auth_user, ) @@ -70,7 +59,7 @@ async def wrapper(*args, **kwargs): except Exception as e: - await self.service.logging_connection.log( + await self.services["ingestion"].logging_connection.log( run_id=run_id, key="error", value=str(e), diff --git a/py/core/main/api/v3/chunks_router.py b/py/core/main/api/v3/chunks_router.py new file mode 100644 index 000000000..6031e3c6c --- /dev/null +++ b/py/core/main/api/v3/chunks_router.py @@ -0,0 +1,524 @@ +import json +import logging +import textwrap +from copy import copy +from typing import Any, Optional +from uuid import UUID + +from fastapi import Body, Depends, Path, Query + +from core.base import ( + ChunkResponse, + ChunkSearchSettings, + GraphSearchSettings, + R2RException, + RunType, + SearchSettings, + UnprocessedChunk, + UpdateChunk, +) +from core.base.api.models import ( + GenericBooleanResponse, + WrappedBooleanResponse, + WrappedChunkResponse, + WrappedChunksResponse, + WrappedVectorSearchResponse, +) +from core.providers import ( + HatchetOrchestrationProvider, + SimpleOrchestrationProvider, +) +from core.utils import generate_id + +from .base_router import BaseRouterV3 + +logger = logging.getLogger() + +MAX_CHUNKS_PER_REQUEST = 1024 * 100 + + +class ChunksRouter(BaseRouterV3): + def __init__( + self, + providers, + services, + orchestration_provider: ( + HatchetOrchestrationProvider | SimpleOrchestrationProvider + ), + run_type: RunType = RunType.INGESTION, + ): + super().__init__(providers, services, orchestration_provider, run_type) + + def _select_filters( + self, + auth_user: Any, + search_settings: SearchSettings, + ) -> dict[str, Any]: + + filters = copy(search_settings.filters) + selected_collections = None + if not auth_user.is_superuser: + user_collections = set(auth_user.collection_ids) + for key in filters.keys(): + if "collection_ids" in key: + selected_collections = set(filters[key]["$overlap"]) + break + + if selected_collections: + allowed_collections = user_collections.intersection( + selected_collections + ) + else: + allowed_collections = user_collections + # for non-superusers, we filter by user_id and selected & allowed collections + collection_filters = { + "$or": [ + {"user_id": {"$eq": auth_user.id}}, + { + "collection_ids": { + "$overlap": list(allowed_collections) + } + }, + ] # type: ignore + } + + filters.pop("collection_ids", None) + + filters = {"$and": [collection_filters, filters]} # type: ignore + + return filters + + def _setup_routes(self): + @self.router.post( + "/chunks/search", + summary="Search Chunks", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + response = client.chunks.search( + query="search query", + search_settings={ + "limit": 10 + } + ) + """ + ), + } + ] + }, + ) + @self.base_endpoint + async def search_chunks( + query: str = Body(...), + search_settings: SearchSettings = Body( + default_factory=SearchSettings, + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedVectorSearchResponse: # type: ignore + # TODO - Deduplicate this code by sharing the code on the retrieval router + """ + Perform a semantic search query over all stored chunks. + + This endpoint allows for complex filtering of search results using PostgreSQL-based queries. + Filters can be applied to various fields such as document_id, and internal metadata values. + + Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + """ + + search_settings.filters = self._select_filters( + auth_user, search_settings + ) + + search_settings.graph_settings = GraphSearchSettings(enabled=False) + + results = await self.services["retrieval"].search( + query=query, + search_settings=search_settings, + ) + return results["chunk_search_results"] + + @self.router.get( + "/chunks/{id}", + summary="Retrieve Chunk", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + response = client.chunks.retrieve( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.chunks.retrieve({ + id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def retrieve_chunk( + id: UUID = Path(...), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedChunkResponse: + """ + Get a specific chunk by its ID. + + Returns the chunk's content, metadata, and associated document/collection information. + Users can only retrieve chunks they own or have access to through collections. + """ + chunk = await self.services["ingestion"].get_chunk(id) + if not chunk: + raise R2RException("Chunk not found", 404) + + # # Check access rights + # document = await self.services["management"].get_document(chunk.document_id) + # TODO - Add collection ID check + if not auth_user.is_superuser and str(auth_user.id) != str( + chunk["user_id"] + ): + raise R2RException("Not authorized to access this chunk", 403) + + return ChunkResponse( # type: ignore + id=chunk["id"], + document_id=chunk["document_id"], + owner_id=chunk["owner_id"], + collection_ids=chunk["collection_ids"], + text=chunk["text"], + metadata=chunk["metadata"], + # vector = chunk["vector"] # TODO - Add include vector flag + ) + + @self.router.post( + "/chunks/{id}", + summary="Update Chunk", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + response = client.chunks.update( + { + "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "text": "Updated content", + "metadata": {"key": "new value"} + } + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.chunks.update({ + id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + text: "Updated content", + metadata: {key: "new value"} + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def update_chunk( + id: UUID = Path(...), + chunk_update: UpdateChunk = Body(...), + # TODO: Run with orchestration? + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedChunkResponse: + """ + Update an existing chunk's content and/or metadata. + + The chunk's vectors will be automatically recomputed based on the new content. + Users can only update chunks they own unless they are superusers. + """ + # Get the existing chunk to get its chunk_id + existing_chunk = await self.services["ingestion"].get_chunk( + chunk_update.id + ) + if existing_chunk is None: + raise R2RException(f"Chunk {chunk_update.id} not found", 404) + + workflow_input = { + "document_id": str(existing_chunk["document_id"]), + "id": str(chunk_update.id), + "text": chunk_update.text, + "metadata": chunk_update.metadata + or existing_chunk["metadata"], + "user": auth_user.model_dump_json(), + } + + logger.info("Running chunk ingestion without orchestration.") + from core.main.orchestration import simple_ingestion_factory + + # TODO - CLEAN THIS UP + + simple_ingestor = simple_ingestion_factory( + self.services["ingestion"] + ) + await simple_ingestor["update-chunk"](workflow_input) + + return ChunkResponse( # type: ignore + id=chunk_update.id, + document_id=existing_chunk["document_id"], + owner_id=existing_chunk["owner_id"], + collection_ids=existing_chunk["collection_ids"], + text=chunk_update.text, + metadata=chunk_update.metadata or existing_chunk["metadata"], + # vector = existing_chunk.get('vector') + ) + + # @self.router.post( + # "/chunks/{id}/enrich", + # summary="Enrich Chunk", + # openapi_extra={ + # "x-codeSamples": [ + # { + # "lang": "Python", + # "source": """ + # from r2r import R2RClient + + # client = R2RClient("http://localhost:7272") + # result = client.chunks.enrich( + # id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + # enrichment_config={"key": "value"} + # ) + # """, + # } + # ] + # }, + # ) + # @self.base_endpoint + # async def enrich_chunk( + # id: UUID = Path(...), + # enrichment_config: dict = Body(...), + # auth_user=Depends(self.providers.auth.auth_wrapper), + # ) -> ResultsWrapper[ChunkResponse]: + # """ + # Enrich a chunk with additional processing and metadata. + + # This endpoint allows adding additional enrichments to an existing chunk, + # such as entity extraction, classification, or custom processing defined + # in the enrichment_config. + + # Users can only enrich chunks they own unless they are superusers. + # """ + # pass + + @self.router.delete( + "/chunks/{id}", + summary="Delete Chunk", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + response = client.chunks.delete( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.chunks.delete({ + id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def delete_chunk( + id: UUID = Path(...), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Delete a specific chunk by ID. + + This permanently removes the chunk and its associated vector embeddings. + The parent document remains unchanged. Users can only delete chunks they + own unless they are superusers. + """ + # Get the existing chunk to get its chunk_id + existing_chunk = await self.services["ingestion"].get_chunk(id) + if existing_chunk is None: + raise R2RException( + message=f"Chunk {id} not found", status_code=404 + ) + + filters = { + "$and": [ + {"id": {"$eq": str(auth_user.id)}}, + {"id": {"$eq": id}}, + ] + } + await self.services["management"].delete(filters=filters) + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.get( + "/chunks", + summary="List Chunks", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + response = client.chunks.list( + metadata_filter={"key": "value"}, + include_vectors=False, + offset=0, + limit=10, + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.chunks.list({ + metadataFilter: {key: "value"}, + includeVectors: false, + offset: 0, + limit: 10, + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def list_chunks( + metadata_filter: Optional[str] = Query( + None, description="Filter by metadata" + ), + include_vectors: bool = Query( + False, description="Include vector data in response" + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedChunksResponse: + """ + List chunks with pagination support. + + Returns a paginated list of chunks that the user has access to. + Results can be filtered and sorted based on various parameters. + Vector embeddings are only included if specifically requested. + + Regular users can only list chunks they own or have access to through + collections. Superusers can list all chunks in the system. + """ # Build filters + filters = {} + + # Add user access control filter + if not auth_user.is_superuser: + filters["user_id"] = {"$eq": str(auth_user.id)} + + # Add metadata filters if provided + if metadata_filter: + metadata_filter = json.loads(metadata_filter) + + # Get chunks using the vector handler's list_chunks method + results = await self.services["ingestion"].list_chunks( + filters=filters, + include_vectors=include_vectors, + offset=offset, + limit=limit, + ) + + # Convert to response format + chunks = [ + ChunkResponse( + id=chunk["id"], + document_id=chunk["document_id"], + owner_id=chunk["owner_id"], + collection_ids=chunk["collection_ids"], + text=chunk["text"], + metadata=chunk["metadata"], + vector=chunk.get("vector") if include_vectors else None, + ) + for chunk in results["results"] + ] + + return (chunks, results["page_info"]) # type: ignore diff --git a/py/core/main/api/v3/collections_router.py b/py/core/main/api/v3/collections_router.py new file mode 100644 index 000000000..45488f243 --- /dev/null +++ b/py/core/main/api/v3/collections_router.py @@ -0,0 +1,1128 @@ +import logging +import textwrap +from typing import Optional +from uuid import UUID + +from fastapi import Body, Depends, Path, Query + +from core.base import KGCreationSettings, KGRunType, R2RException, RunType +from core.base.api.models import ( + GenericBooleanResponse, + WrappedBooleanResponse, + WrappedCollectionResponse, + WrappedCollectionsResponse, + WrappedDocumentsResponse, + WrappedGenericMessageResponse, + WrappedUsersResponse, +) +from core.providers import ( + HatchetOrchestrationProvider, + SimpleOrchestrationProvider, +) +from core.utils import ( + generate_default_user_collection_id, + update_settings_from_dict, +) + +from .base_router import BaseRouterV3 + +logger = logging.getLogger() + + +class CollectionsRouter(BaseRouterV3): + def __init__( + self, + providers, + services, + orchestration_provider: ( + HatchetOrchestrationProvider | SimpleOrchestrationProvider + ), + run_type: RunType = RunType.MANAGEMENT, + ): + super().__init__(providers, services, orchestration_provider, run_type) + + def _setup_routes(self): + @self.router.post( + "/collections", + summary="Create a new collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.create( + name="My New Collection", + description="This is a sample collection" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.create({ + name: "My New Collection", + description: "This is a sample collection" + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r collections create "My New Collection" --description="This is a sample collection" + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/collections" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{"name": "My New Collection", "description": "This is a sample collection"}' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def create_collection( + name: str = Body(..., description="The name of the collection"), + description: Optional[str] = Body( + None, description="An optional description of the collection" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedCollectionResponse: + """ + Create a new collection and automatically add the creating user to it. + + This endpoint allows authenticated users to create a new collection with a specified name + and optional description. The user creating the collection is automatically added as a member. + """ + collection = await self.services["management"].create_collection( + owner_id=auth_user.id, + name=name, + description=description, + ) + # Add the creating user to the collection + await self.services["management"].add_user_to_collection( + auth_user.id, collection.id + ) + return collection + + @self.router.get( + "/collections", + summary="List collections", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.list( + offset=0, + limit=10, + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.list(); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r collections list + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/collections?offset=0&limit=10&name=Sample" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def list_collections( + ids: list[str] = Query( + [], + description="A list of collection IDs to retrieve. If not provided, all collections will be returned.", + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedCollectionsResponse: + """ + Returns a paginated list of collections the authenticated user has access to. + + Results can be filtered by providing specific collection IDs. Regular users will only see + collections they own or have access to. Superusers can see all collections. + + The collections are returned in order of last modification, with most recent first. + """ + requesting_user_id = ( + None if auth_user.is_superuser else [auth_user.id] + ) + + collection_uuids = [UUID(collection_id) for collection_id in ids] + + collections_overview_response = await self.services[ + "management" + ].collections_overview( + user_ids=requesting_user_id, + collection_ids=collection_uuids, + offset=offset, + limit=limit, + ) + + return ( # type: ignore + collections_overview_response["results"], + { + "total_entries": collections_overview_response[ + "total_entries" + ] + }, + ) + + @self.router.get( + "/collections/{id}", + summary="Get collection details", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.retrieve("123e4567-e89b-12d3-a456-426614174000") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.retrieve({id: "123e4567-e89b-12d3-a456-426614174000"}); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r collections retrieve 123e4567-e89b-12d3-a456-426614174000 + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_collection( + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedCollectionResponse: + """ + Get details of a specific collection. + + This endpoint retrieves detailed information about a single collection identified by its UUID. + The user must have access to the collection to view its details. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + collections_overview_response = await self.services[ + "management" + ].collections_overview( + user_ids=None, + collection_ids=[id], + offset=0, + limit=1, + ) + return collections_overview_response["results"][0] + + @self.router.post( + "/collections/{id}", + summary="Update collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.update( + "123e4567-e89b-12d3-a456-426614174000", + name="Updated Collection Name", + description="Updated description" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.update({ + id: "123e4567-e89b-12d3-a456-426614174000", + name: "Updated Collection Name", + description: "Updated description" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{"name": "Updated Collection Name", "description": "Updated description"}' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def update_collection( + id: UUID = Path( + ..., + description="The unique identifier of the collection to update", + ), + name: Optional[str] = Body( + None, description="The name of the collection" + ), + description: Optional[str] = Body( + None, description="An optional description of the collection" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedCollectionResponse: + """ + Update an existing collection's configuration. + + This endpoint allows updating the name and description of an existing collection. + The user must have appropriate permissions to modify the collection. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + return await self.services["management"].update_collection( # type: ignore + id, + name=name, + description=description, + ) + + @self.router.delete( + "/collections/{id}", + summary="Delete collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.delete("123e4567-e89b-12d3-a456-426614174000") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.delete({id: "123e4567-e89b-12d3-a456-426614174000"}); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r collections delete 123e4567-e89b-12d3-a456-426614174000 + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def delete_collection( + id: UUID = Path( + ..., + description="The unique identifier of the collection to delete", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Delete an existing collection. + + This endpoint allows deletion of a collection identified by its UUID. + The user must have appropriate permissions to delete the collection. + Deleting a collection removes all associations but does not delete the documents within it. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + await self.services["management"].delete_collection(id) + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.post( + "/collections/{id}/documents/{document_id}", + summary="Add document to collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.add_document( + "123e4567-e89b-12d3-a456-426614174000", + "456e789a-b12c-34d5-e678-901234567890" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.addDocument({ + id: "123e4567-e89b-12d3-a456-426614174000" + documentId: "456e789a-b12c-34d5-e678-901234567890" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/documents/456e789a-b12c-34d5-e678-901234567890" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def add_document_to_collection( + id: UUID = Path(...), + document_id: UUID = Path(...), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedGenericMessageResponse: + """ + Add a document to a collection. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + return await self.services[ + "management" + ].assign_document_to_collection(document_id, id) + + @self.router.get( + "/collections/{id}/documents", + summary="List documents in collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.list_documents( + "123e4567-e89b-12d3-a456-426614174000", + offset=0, + limit=10, + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.listDocuments({id: "123e4567-e89b-12d3-a456-426614174000"}); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r collections list-documents 123e4567-e89b-12d3-a456-426614174000 + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/documents?offset=0&limit=10" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_collection_documents( + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedDocumentsResponse: + """ + Get all documents in a collection with pagination and sorting options. + + This endpoint retrieves a paginated list of documents associated with a specific collection. + It supports sorting options to customize the order of returned documents. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + documents_in_collection_response = await self.services[ + "management" + ].documents_in_collection(id, offset, limit) + + return documents_in_collection_response["results"], { # type: ignore + "total_entries": documents_in_collection_response[ + "total_entries" + ] + } + + @self.router.delete( + "/collections/{id}/documents/{document_id}", + summary="Remove document from collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.remove_document( + "123e4567-e89b-12d3-a456-426614174000", + "456e789a-b12c-34d5-e678-901234567890" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.removeDocument({ + id: "123e4567-e89b-12d3-a456-426614174000" + documentId: "456e789a-b12c-34d5-e678-901234567890" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/documents/456e789a-b12c-34d5-e678-901234567890" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def remove_document_from_collection( + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + document_id: UUID = Path( + ..., + description="The unique identifier of the document to remove", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Remove a document from a collection. + + This endpoint removes the association between a document and a collection. + It does not delete the document itself. The user must have permissions to modify the collection. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + await self.services["management"].remove_document_from_collection( + document_id, id + ) + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.get( + "/collections/{id}/users", + summary="List users in collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.list_users( + "123e4567-e89b-12d3-a456-426614174000", + offset=0, + limit=10, + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.listUsers({ + id: "123e4567-e89b-12d3-a456-426614174000" + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r collections list-users 123e4567-e89b-12d3-a456-426614174000 + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/users?offset=0&limit=10" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_collection_users( + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedUsersResponse: + """ + Get all users in a collection with pagination and sorting options. + + This endpoint retrieves a paginated list of users who have access to a specific collection. + It supports sorting options to customize the order of returned users. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + users_in_collection_response = await self.services[ + "management" + ].get_users_in_collection( + collection_id=id, + offset=offset, + limit=min(max(limit, 1), 1000), + ) + + return users_in_collection_response["results"], { # type: ignore + "total_entries": users_in_collection_response["total_entries"] + } + + @self.router.post( + "/collections/{id}/users/{user_id}", + summary="Add user to collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.add_user( + "123e4567-e89b-12d3-a456-426614174000", + "789a012b-c34d-5e6f-g789-012345678901" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.addUser({ + id: "123e4567-e89b-12d3-a456-426614174000" + userId: "789a012b-c34d-5e6f-g789-012345678901" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/users/789a012b-c34d-5e6f-g789-012345678901" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def add_user_to_collection( + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + user_id: UUID = Path( + ..., description="The unique identifier of the user to add" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Add a user to a collection. + + This endpoint grants a user access to a specific collection. + The authenticated user must have admin permissions for the collection to add new users. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + result = await self.services["management"].add_user_to_collection( + user_id, id + ) + return GenericBooleanResponse(success=result) # type: ignore + + @self.router.delete( + "/collections/{id}/users/{user_id}", + summary="Remove user from collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.collections.remove_user( + "123e4567-e89b-12d3-a456-426614174000", + "789a012b-c34d-5e6f-g789-012345678901" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.collections.removeUser({ + id: "123e4567-e89b-12d3-a456-426614174000" + userId: "789a012b-c34d-5e6f-g789-012345678901" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/collections/123e4567-e89b-12d3-a456-426614174000/users/789a012b-c34d-5e6f-g789-012345678901" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def remove_user_from_collection( + id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + user_id: UUID = Path( + ..., description="The unique identifier of the user to remove" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Remove a user from a collection. + + This endpoint revokes a user's access to a specific collection. + The authenticated user must have admin permissions for the collection to remove users. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + await self.services["management"].remove_user_from_collection( + user_id, id + ) + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.post( + "/collections/{id}/extract", + summary="Extract entities and relationships", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.documents.extract( + id="9fbe403b-c11c-5aae-8ade-ef22980c3ad1" + ) + """ + ), + }, + ], + }, + ) + @self.base_endpoint + async def extract( + id: UUID = Path( + ..., + description="The ID of the document to extract entities and relationships from.", + ), + run_type: KGRunType = Query( + default=KGRunType.RUN, + description="Whether to return an estimate of the creation cost or to actually extract the document.", + ), + settings: Optional[KGCreationSettings] = Body( + default=None, + description="Settings for the entities and relationships extraction process.", + ), + run_with_orchestration: Optional[bool] = Query( + default=True, + description="Whether to run the entities and relationships extraction process with orchestration.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ): + """ + Extracts entities and relationships from a document. + The entities and relationships extraction process involves: + 1. Parsing documents into semantic chunks + 2. Extracting entities and relationships using LLMs + """ + + settings = settings.dict() if settings else None # type: ignore + if not auth_user.is_superuser: + logger.warning("Implement permission checks here.") + + # If no run type is provided, default to estimate + if not run_type: + run_type = KGRunType.ESTIMATE + + # Apply runtime settings overrides + server_graph_creation_settings = ( + self.providers.database.config.graph_creation_settings + ) + + if settings: + server_graph_creation_settings = update_settings_from_dict( + server_settings=server_graph_creation_settings, + settings_dict=settings, # type: ignore + ) + + # If the run type is estimate, return an estimate of the creation cost + # if run_type is KGRunType.ESTIMATE: + # return { # type: ignore + # "message": "Estimate retrieved successfully", + # "task_id": None, + # "id": id, + # "estimate": await self.services[ + # "kg" + # ].get_creation_estimate( + # document_id=id, + # graph_creation_settings=server_graph_creation_settings, + # ), + # } + # else: + # Otherwise, create the graph + if run_with_orchestration: + workflow_input = { + "collection_id": str(id), + "graph_creation_settings": server_graph_creation_settings.model_dump_json(), + "user": auth_user.json(), + } + + return await self.orchestration_provider.run_workflow( # type: ignore + "extract-triples", {"request": workflow_input}, {} + ) + else: + from core.main.orchestration import simple_kg_factory + + logger.info("Running extract-triples without orchestration.") + simple_kg = simple_kg_factory(self.services["kg"]) + await simple_kg["extract-triples"](workflow_input) # type: ignore + return { # type: ignore + "message": "Graph created successfully.", + "task_id": None, + } diff --git a/py/core/main/api/v3/conversations_router.py b/py/core/main/api/v3/conversations_router.py new file mode 100644 index 000000000..a111ea205 --- /dev/null +++ b/py/core/main/api/v3/conversations_router.py @@ -0,0 +1,654 @@ +import logging +import textwrap +from typing import Optional +from uuid import UUID + +from fastapi import Body, Depends, Path, Query + +from core.base import Message, RunType +from core.base.api.models import ( + GenericBooleanResponse, + WrappedBooleanResponse, + WrappedBranchesResponse, + WrappedConversationMessagesResponse, + WrappedConversationResponse, + WrappedConversationsResponse, + WrappedMessageResponse, +) +from core.providers import ( + HatchetOrchestrationProvider, + SimpleOrchestrationProvider, +) + +from .base_router import BaseRouterV3 + +logger = logging.getLogger() + + +class ConversationsRouter(BaseRouterV3): + def __init__( + self, + providers, + services, + orchestration_provider: ( + HatchetOrchestrationProvider | SimpleOrchestrationProvider + ), + run_type: RunType = RunType.MANAGEMENT, + ): + super().__init__(providers, services, orchestration_provider, run_type) + + def _setup_routes(self): + @self.router.post( + "/conversations", + summary="Create a new conversation", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.conversations.create() + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.conversations.create(); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r conversations create + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/conversations" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def create_conversation( + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedConversationResponse: + """ + Create a new conversation. + + This endpoint initializes a new conversation for the authenticated user. + """ + return await self.services["management"].create_conversation() + + @self.router.get( + "/conversations", + summary="List conversations", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.conversations.list( + offset=0, + limit=10, + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.conversations.list(); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r conversations list + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/conversations?offset=0&limit=10" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def list_conversations( + ids: list[str] = Query( + [], + description="A list of conversation IDs to retrieve. If not provided, all conversations will be returned.", + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedConversationsResponse: + """ + List conversations with pagination and sorting options. + + This endpoint returns a paginated list of conversations for the authenticated user. + """ + conversation_uuids = [ + UUID(conversation_id) for conversation_id in ids + ] + + conversations_response = await self.services[ + "management" + ].conversations_overview( + conversation_ids=conversation_uuids, + offset=offset, + limit=limit, + ) + return conversations_response["results"], { # type: ignore + "total_entries": conversations_response["total_entries"] + } + + @self.router.get( + "/conversations/{id}", + summary="Get conversation details", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.conversations.get( + "123e4567-e89b-12d3-a456-426614174000" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.conversations.retrieve({ + id: "123e4567-e89b-12d3-a456-426614174000", + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r conversations retrieve 123e4567-e89b-12d3-a456-426614174000 + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/conversations/123e4567-e89b-12d3-a456-426614174000?branch_id=branch_1" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_conversation( + id: UUID = Path( + ..., description="The unique identifier of the conversation" + ), + branch_id: Optional[str] = Query( + None, description="The ID of the specific branch to retrieve" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedConversationMessagesResponse: + """ + Get details of a specific conversation. + + This endpoint retrieves detailed information about a single conversation identified by its UUID. + """ + return await self.services["management"].get_conversation( + str(id), + branch_id, + ) + + @self.router.delete( + "/conversations/{id}", + summary="Delete conversation", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.conversations.delete("123e4567-e89b-12d3-a456-426614174000") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.conversations.delete({ + id: "123e4567-e89b-12d3-a456-426614174000", + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r conversations delete 123e4567-e89b-12d3-a456-426614174000 + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/conversations/123e4567-e89b-12d3-a456-426614174000" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def delete_conversation( + id: UUID = Path( + ..., + description="The unique identifier of the conversation to delete", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Delete an existing conversation. + + This endpoint deletes a conversation identified by its UUID. + """ + await self.services["management"].delete_conversation(str(id)) + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.post( + "/conversations/{id}/messages", + summary="Add message to conversation", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.conversations.add_message( + "123e4567-e89b-12d3-a456-426614174000", + content="Hello, world!", + role="user", + parent_id="parent_message_id", + metadata={"key": "value"} + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.conversations.addMessage({ + id: "123e4567-e89b-12d3-a456-426614174000", + content: "Hello, world!", + role: "user", + parentId: "parent_message_id", + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/conversations/123e4567-e89b-12d3-a456-426614174000/messages" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -H "Content-Type: application/json" \\ + -d '{"content": "Hello, world!", "parent_id": "parent_message_id", "metadata": {"key": "value"}}' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def add_message( + id: UUID = Path( + ..., description="The unique identifier of the conversation" + ), + content: str = Body( + ..., description="The content of the message to add" + ), + role: str = Body( + ..., description="The role of the message to add" + ), + parent_id: Optional[str] = Body( + None, description="The ID of the parent message, if any" + ), + metadata: Optional[dict[str, str]] = Body( + None, description="Additional metadata for the message" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedMessageResponse: + """ + Add a new message to a conversation. + + This endpoint adds a new message to an existing conversation. + """ + message = Message(role=role, content=content) + return await self.services["management"].add_message( + str(id), + message, + parent_id, + metadata, + ) + + @self.router.post( + "/conversations/{id}/messages/{message_id}", + summary="Update message in conversation", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.conversations.update_message( + "123e4567-e89b-12d3-a456-426614174000", + "message_id_to_update", + content="Updated content" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.conversations.updateMessage({ + id: "123e4567-e89b-12d3-a456-426614174000", + messageId: "message_id_to_update", + content: "Updated content", + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/conversations/123e4567-e89b-12d3-a456-426614174000/messages/message_id_to_update" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -H "Content-Type: application/json" \\ + -d '{"content": "Updated content"}' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def update_message( + id: UUID = Path( + ..., description="The unique identifier of the conversation" + ), + message_id: str = Path( + ..., description="The ID of the message to update" + ), + content: str = Body( + ..., description="The new content for the message" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> dict: + """ + Update an existing message in a conversation. + + This endpoint updates the content of an existing message in a conversation. + """ + new_message_id, new_branch_id = await self.services[ + "management" + ].edit_message(message_id, content) + return { # type: ignore + "new_message_id": new_message_id, + "new_branch_id": new_branch_id, + } + + @self.router.get( + "/conversations/{id}/branches", + summary="List branches in conversation", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.conversations.list_branches("123e4567-e89b-12d3-a456-426614174000") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.conversations.listBranches({ + id: "123e4567-e89b-12d3-a456-426614174000", + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r conversations list-branches 123e4567-e89b-12d3-a456-426614174000 + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/conversations/123e4567-e89b-12d3-a456-426614174000/branches" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def list_branches( + id: UUID = Path( + ..., description="The unique identifier of the conversation" + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBranchesResponse: + """ + List all branches in a conversation. + + This endpoint retrieves all branches associated with a specific conversation. + """ + branches_response = await self.services[ + "management" + ].branches_overview( + offset=offset, + limit=limit, + conversation_id=str(id), + ) + + return branches_response["results"], { # type: ignore + "total_entries": branches_response["total_entries"] + } + + # Commented endpoints to be published after more testing + # @self.router.get("/conversations/{id}/branches/{branch_id}/next") + # @self.base_endpoint + # async def get_next_branch( + # id: UUID = Path(...), + # branch_id: str = Path(...), + # auth_user=Depends(self.providers.auth.auth_wrapper), + # ) -> dict: + # """ + # Get the next branch in the conversation. + # """ + # next_branch_id = await self.services.management.get_next_branch(branch_id) + # return {"next_branch_id": next_branch_id} + + # @self.router.get("/conversations/{id}/branches/{branch_id}/previous") + # @self.base_endpoint + # async def get_previous_branch( + # id: UUID = Path(...), + # branch_id: str = Path(...), + # auth_user=Depends(self.providers.auth.auth_wrapper), + # ) -> dict: + # """ + # Get the previous branch in the conversation. + # """ + # prev_branch_id = await self.services.management.get_prev_branch(branch_id) + # return {"prev_branch_id": prev_branch_id} + + # @self.router.post("/conversations/{id}/messages/{message_id}/branch") + # @self.base_endpoint + # async def create_branch( + # id: UUID = Path(...), + # message_id: str = Path(...), + # auth_user=Depends(self.providers.auth.auth_wrapper), + # ) -> dict: + # """ + # Create a new branch starting from a specific message. + # """ + # branch_id = await self.services.management.branch_at_message(message_id) + # return {"branch_id": branch_id} diff --git a/py/core/main/api/v3/documents_router.py b/py/core/main/api/v3/documents_router.py new file mode 100644 index 000000000..907dae474 --- /dev/null +++ b/py/core/main/api/v3/documents_router.py @@ -0,0 +1,1473 @@ +import base64 +import json +import logging +import mimetypes +import textwrap +from io import BytesIO +from typing import Optional +from uuid import UUID + +from fastapi import Body, Depends, File, Form, Path, Query, UploadFile +from fastapi.responses import StreamingResponse +from pydantic import Json + +from core.base import ( + R2RException, + RunType, + UnprocessedChunk, + Workflow, + generate_document_id, + generate_id, +) +from core.base.abstractions import KGCreationSettings, KGRunType +from core.base.api.models import ( + GenericBooleanResponse, + WrappedBooleanResponse, + WrappedChunksResponse, + WrappedCollectionsResponse, + WrappedDocumentResponse, + WrappedDocumentsResponse, + WrappedEntitiesResponse, + WrappedGenericMessageResponse, + WrappedIngestionResponse, + WrappedRelationshipsResponse, +) +from core.providers import ( + HatchetOrchestrationProvider, + SimpleOrchestrationProvider, +) +from core.utils import update_settings_from_dict + +from .base_router import BaseRouterV3 + +logger = logging.getLogger() +MAX_CHUNKS_PER_REQUEST = 1024 * 100 + + +class DocumentsRouter(BaseRouterV3): + def __init__( + self, + providers, + services, + orchestration_provider: ( + HatchetOrchestrationProvider | SimpleOrchestrationProvider + ), + run_type: RunType = RunType.INGESTION, + ): + super().__init__(providers, services, orchestration_provider, run_type) + self._register_workflows() + + # TODO - Remove this legacy method + def _register_workflows(self): + self.orchestration_provider.register_workflows( + Workflow.INGESTION, + self.services["ingestion"], + { + "ingest-files": ( + "Ingest files task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Document created and ingested successfully." + ), + "ingest-chunks": ( + "Ingest chunks task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Document created and ingested successfully." + ), + "update-files": ( + "Update file task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Update task queued successfully." + ), + "update-chunk": ( + "Update chunk task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Chunk update completed successfully." + ), + "update-document-metadata": ( + "Update document metadata task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Document metadata update completed successfully." + ), + "create-vector-index": ( + "Vector index creation task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Vector index creation task completed successfully." + ), + "delete-vector-index": ( + "Vector index deletion task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Vector index deletion task completed successfully." + ), + "select-vector-index": ( + "Vector index selection task queued successfully." + if self.orchestration_provider.config.provider != "simple" + else "Vector index selection task completed successfully." + ), + }, + ) + + def _setup_routes(self): + @self.router.post( + "/documents", + status_code=202, + summary="Create a new document", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.documents.create( + file_path="pg_essay_1.html", + metadata={"metadata_1":"some random metadata"}, + id=None + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.documents.create({ + file: { path: "examples/data/marmeladov.txt", name: "marmeladov.txt" }, + metadata: { title: "marmeladov.txt" }, + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r documents create /path/to/file.txt + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/documents" \\ + -H "Content-Type: multipart/form-data" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -F "file=@pg_essay_1.html;type=text/html" \\ + -F 'metadata={}' \\ + -F 'id=null' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def create_document( + file: Optional[UploadFile] = File( + None, + description="The file to ingest. Exactly one of file, raw_text, or chunks must be provided.", + ), + raw_text: Optional[str] = Form( + None, + description="Raw text content to ingest. Exactly one of file, raw_text, or chunks must be provided.", + ), + chunks: Optional[Json[list[str]]] = Form( + None, + description="Pre-processed text chunks to ingest. Exactly one of file, raw_text, or chunks must be provided.", + ), + id: Optional[UUID] = Form( + None, + description="The ID of the document. If not provided, a new ID will be generated.", + ), + collection_ids: Optional[Json[list[UUID]]] = Form( + None, + description="Collection IDs to associate with the document. If none are provided, the document will be assigned to the user's default collection.", + ), + metadata: Optional[Json[dict]] = Form( + None, + description="Metadata to associate with the document, such as title, description, or custom fields.", + ), + ingestion_config: Optional[Json[dict]] = Form( + None, + description="An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration.", + ), + run_with_orchestration: Optional[bool] = Form( + True, + description="Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedIngestionResponse: + """ + Creates a new Document object from an input file or text content. The document will be processed + to create chunks for vector indexing and search. + + Either a file or text content must be provided, but not both. Documents are shared through `Collections` which allow for tightly specified cross-user interactions. + + The ingestion process runs asynchronously and its progress can be tracked using the returned + task_id. + """ + if not file and not raw_text and not chunks: + raise R2RException( + status_code=422, + message="Either a `file`, `raw_text`, or `chunks` must be provided.", + ) + if ( + (file and raw_text) + or (file and chunks) + or (raw_text and chunks) + ): + raise R2RException( + status_code=422, + message="Only one of `file`, `raw_text`, or `chunks` may be provided.", + ) + # Check if the user is a superuser + metadata = metadata or {} + if not auth_user.is_superuser: + if "user_id" in metadata and ( + not auth_user.is_superuser + and metadata["user_id"] != str(auth_user.id) + ): + raise R2RException( + status_code=403, + message="Non-superusers cannot set user_id in metadata.", + ) + # If user is not a superuser, set user_id in metadata + metadata["user_id"] = str(auth_user.id) + + if chunks: + if len(chunks) == 0: + raise R2RException("Empty list of chunks provided", 400) + + if len(chunks) > MAX_CHUNKS_PER_REQUEST: + raise R2RException( + f"Maximum of {MAX_CHUNKS_PER_REQUEST} chunks per request", + 400, + ) + document_id = generate_document_id( + str(json.dumps(chunks)), auth_user.id + ) + + # FIXME: Metadata doesn't seem to be getting passed through + raw_chunks_for_doc = [ + UnprocessedChunk( + text=chunk, metadata=metadata, id=generate_id() + ) + for chunk in chunks + ] + + # Prepare workflow input + workflow_input = { + "document_id": str(document_id), + "chunks": [ + chunk.model_dump() for chunk in raw_chunks_for_doc + ], + "metadata": metadata, # Base metadata for the document + "user": auth_user.model_dump_json(), + } + + # TODO - Modify create_chunks so that we can add chunks to existing document + + if run_with_orchestration: + # Run ingestion with orchestration + raw_message = ( + await self.orchestration_provider.run_workflow( + "ingest-chunks", + {"request": workflow_input}, + options={ + "additional_metadata": { + "document_id": str(document_id), + } + }, + ) + ) + raw_message["document_id"] = str(document_id) + return raw_message + + else: + logger.info( + "Running chunk ingestion without orchestration." + ) + from core.main.orchestration import ( + simple_ingestion_factory, + ) + + simple_ingestor = simple_ingestion_factory( + self.services["ingestion"] + ) + await simple_ingestor["ingest-chunks"](workflow_input) + + return { # type: ignore + "message": "Document created and ingested successfully.", + "document_id": str(document_id), + "task_id": None, + } + + else: + if file: + file_data = await self._process_file(file) + content_length = len(file_data["content"]) + file_content = BytesIO( + base64.b64decode(file_data["content"]) + ) + + file_data.pop("content", None) + document_id = id or generate_document_id( + file_data["filename"], auth_user.id + ) + elif raw_text: + content_length = len(raw_text) + file_content = BytesIO(raw_text.encode("utf-8")) + document_id = id or generate_document_id( + raw_text, auth_user.id + ) + file_data = { + "filename": "N/A", + "content_type": "text/plain", + } + else: + raise R2RException( + status_code=422, + message="Either a file or content must be provided.", + ) + + workflow_input = { + "file_data": file_data, + "document_id": str(document_id), + "collection_ids": collection_ids, + "metadata": metadata, + "ingestion_config": ingestion_config, + "user": auth_user.model_dump_json(), + "size_in_bytes": content_length, + "is_update": False, + } + + file_name = file_data["filename"] + await self.providers.database.store_file( + document_id, + file_name, + file_content, + file_data["content_type"], + ) + + if run_with_orchestration: + raw_message: dict[str, str | None] = await self.orchestration_provider.run_workflow( # type: ignore + "ingest-files", + {"request": workflow_input}, + options={ + "additional_metadata": { + "document_id": str(document_id), + } + }, + ) + raw_message["document_id"] = str(document_id) + return raw_message # type: ignore + else: + logger.info( + f"Running ingestion without orchestration for file {file_name} and document_id {document_id}." + ) + # TODO - Clean up implementation logic here to be more explicitly `synchronous` + from core.main.orchestration import simple_ingestion_factory + + simple_ingestor = simple_ingestion_factory( + self.services["ingestion"] + ) + await simple_ingestor["ingest-files"](workflow_input) + return { # type: ignore + "message": "Document created and ingested successfully.", + "document_id": str(document_id), + "task_id": None, + } + + @self.router.get( + "/documents", + summary="List documents", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.documents.list( + limit=10, + offset=0 + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.documents.list({ + limit: 10, + offset: 0, + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r documents create /path/to/file.txt + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/documents" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_documents( + ids: list[str] = Query( + [], + description="A list of document IDs to retrieve. If not provided, all documents will be returned.", + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + include_summary_embeddings: int = Query( + False, + description="Specifies whether or not to include embeddings of each document summary.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedDocumentsResponse: + """ + Returns a paginated list of documents the authenticated user has access to. + + Results can be filtered by providing specific document IDs. Regular users will only see + documents they own or have access to through collections. Superusers can see all documents. + + The documents are returned in order of last modification, with most recent first. + """ + requesting_user_id = ( + None if auth_user.is_superuser else [auth_user.id] + ) + filter_collection_ids = ( + None if auth_user.is_superuser else auth_user.collection_ids + ) + + document_uuids = [UUID(document_id) for document_id in ids] + documents_overview_response = await self.services[ + "management" + ].documents_overview( + user_ids=requesting_user_id, + collection_ids=filter_collection_ids, + document_ids=document_uuids, + offset=offset, + limit=limit, + ) + if not include_summary_embeddings: + for document in documents_overview_response["results"]: + document.summary_embedding = None + + return ( # type: ignore + documents_overview_response["results"], + { + "total_entries": documents_overview_response[ + "total_entries" + ] + }, + ) + + @self.router.get( + "/documents/{id}", + summary="Retrieve a document", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.documents.retrieve( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.documents.retrieve({ + id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r documents retrieve b4ac4dd6-5f27-596e-a55b-7cf242ca30aa + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/documents/b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_document( + id: UUID = Path( + ..., + description="The ID of the document to retrieve.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedDocumentResponse: + """ + Retrieves detailed information about a specific document by its ID. + + This endpoint returns the document's metadata, status, and system information. It does not + return the document's content - use the `/documents/{id}/download` endpoint for that. + + Users can only retrieve documents they own or have access to through collections. + Superusers can retrieve any document. + """ + request_user_ids = ( + None if auth_user.is_superuser else [auth_user.id] + ) + filter_collection_ids = ( + None if auth_user.is_superuser else auth_user.collection_ids + ) + + documents_overview_response = await self.services[ + "management" + ].documents_overview( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + user_ids=request_user_ids, + collection_ids=filter_collection_ids, + document_ids=[id], + offset=0, + limit=100, + ) + results = documents_overview_response["results"] + if len(results) == 0: + raise R2RException("Document not found.", 404) + + return results[0] + + @self.router.get( + "/documents/{id}/chunks", + summary="List document chunks", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.documents.list_chunks( + id="32b6a70f-a995-5c51-85d2-834f06283a1e" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.documents.listChunks({ + id: "32b6a70f-a995-5c51-85d2-834f06283a1e", + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r documents list-chunks b4ac4dd6-5f27-596e-a55b-7cf242ca30aa + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/documents/b4ac4dd6-5f27-596e-a55b-7cf242ca30aa/chunks" \\ + -H "Authorization: Bearer YOUR_API_KEY"\ + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def list_chunks( + id: UUID = Path( + ..., + description="The ID of the document to retrieve chunks for.", + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + include_vectors: Optional[bool] = Query( + False, + description="Whether to include vector embeddings in the response.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedChunksResponse: + """ + Retrieves the text chunks that were generated from a document during ingestion. + Chunks represent semantic sections of the document and are used for retrieval + and analysis. + + Users can only access chunks from documents they own or have access to through + collections. Vector embeddings are only included if specifically requested. + + Results are returned in chunk sequence order, representing their position in + the original document. + """ + list_document_chunks = await self.services[ + "management" + ].list_document_chunks(id, offset, limit, include_vectors) + + if not list_document_chunks["results"]: + raise R2RException( + "No chunks found for the given document ID.", 404 + ) + + is_owner = str( + list_document_chunks["results"][0].get("owner_id") + ) == str(auth_user.id) + document_collections = await self.services[ + "management" + ].collections_overview( + offset=0, + limit=-1, + document_ids=[id], + ) + + user_has_access = ( + is_owner + or set(auth_user.collection_ids).intersection( + { + ele.collection_id + for ele in document_collections["results"] + } + ) + != set() + ) + + if not user_has_access and not auth_user.is_superuser: + raise R2RException( + "Not authorized to access this document's chunks.", 403 + ) + + return ( # type: ignore + list_document_chunks["results"], + {"total_entries": list_document_chunks["total_entries"]}, + ) + + @self.router.get( + "/documents/{id}/download", + response_class=StreamingResponse, + summary="Download document content", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.documents.download( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.documents.download({ + id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/documents/b4ac4dd6-5f27-596e-a55b-7cf242ca30aa/download" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_document_file( + id: str = Path(..., description="Document ID"), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> StreamingResponse: + """ + Downloads the original file content of a document. + + For uploaded files, returns the original file with its proper MIME type. + For text-only documents, returns the content as plain text. + + Users can only download documents they own or have access to through collections. + """ + try: + document_uuid = UUID(id) + except ValueError: + raise R2RException( + status_code=422, message="Invalid document ID format." + ) + + # Retrieve the document's information + documents_overview_response = await self.services[ + "management" + ].documents_overview( + user_ids=None, + collection_ids=None, + document_ids=[document_uuid], + offset=0, + limit=1, + ) + + if not documents_overview_response["results"]: + raise R2RException("Document not found.", 404) + + document = documents_overview_response["results"][0] + + is_owner = str(document.user_id) == str(auth_user.id) + + if not auth_user.is_superuser and not is_owner: + document_collections = await self.services[ + "management" + ].collections_overview( + offset=0, + limit=-1, + document_ids=[document_uuid], + ) + + document_collection_ids = { + str(ele.id) for ele in document_collections["results"] + } + + user_collection_ids = set( + str(cid) for cid in auth_user.collection_ids + ) + + has_collection_access = user_collection_ids.intersection( + document_collection_ids + ) + + if not has_collection_access: + raise R2RException( + "Not authorized to access this document.", 403 + ) + + file_tuple = await self.services["management"].download_file( + document_uuid + ) + if not file_tuple: + raise R2RException(status_code=404, message="File not found.") + + file_name, file_content, file_size = file_tuple + + mime_type, _ = mimetypes.guess_type(file_name) + if not mime_type: + mime_type = "application/octet-stream" + + async def file_stream(): + chunk_size = 1024 * 1024 # 1MB + while True: + data = file_content.read(chunk_size) + if not data: + break + yield data + + return StreamingResponse( + file_stream(), + media_type=mime_type, + headers={ + "Content-Disposition": f'inline; filename="{file_name}"', + "Content-Length": str(file_size), + }, + ) + + @self.router.delete( + "/documents/by-filter", + summary="Delete documents by filter", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + response = client.documents.delete_by_filter( + filters={"document_type": {"$eq": "txt"}} + ) + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/documents/by-filter?filters=%7B%22document_type%22%3A%7B%22%24eq%22%3A%22text%22%7D%2C%22created_at%22%3A%7B%22%24lt%22%3A%222023-01-01T00%3A00%3A00Z%22%7D%7D" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def delete_document_by_filter( + filters: Json[dict] = Body( + ..., description="JSON-encoded filters" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Delete documents based on provided filters. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. Deletion requests are limited to a user's own documents. + """ + + filters_dict = { + "$and": [{"user_id": {"$eq": str(auth_user.id)}}, filters] + } + await self.services["management"].delete(filters=filters_dict) + + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.delete( + "/documents/{id}", + summary="Delete a document", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.documents.delete( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.documents.delete({ + id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r documents delete b4ac4dd6-5f27-596e-a55b-7cf242ca30aa + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/documents/b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def delete_document_by_id( + id: UUID = Path(..., description="Document ID"), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Delete a specific document. All chunks corresponding to the document are deleted, and all other references to the document are removed. + + NOTE - Deletions do not yet impact the knowledge graph or other derived data. This feature is planned for a future release. + """ + filters = { + "$and": [ + {"id": {"$eq": str(auth_user.id)}}, + {"id": {"$eq": id}}, + ] + } + await self.services["management"].delete(filters=filters) + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.get( + "/documents/{id}/collections", + summary="List document collections", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.documents.list_collections( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", offset=0, limit=10 + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.documents.listCollections({ + id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r documents list-collections b4ac4dd6-5f27-596e-a55b-7cf242ca30aa + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/documents/b4ac4dd6-5f27-596e-a55b-7cf242ca30aa/collections" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_document_collections( + id: str = Path(..., description="Document ID"), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedCollectionsResponse: + """ + Retrieves all collections that contain the specified document. This endpoint is restricted + to superusers only and provides a system-wide view of document organization. + + Collections are used to organize documents and manage access control. A document can belong + to multiple collections, and users can access documents through collection membership. + + The results are paginated and ordered by collection creation date, with the most recently + created collections appearing first. + + NOTE - This endpoint is only available to superusers, it will be extended to regular users in a future release. + """ + if not auth_user.is_superuser: + raise R2RException( + "Only a superuser can get the collections belonging to a document.", + 403, + ) + + collections_response = await self.services[ + "management" + ].collections_overview( + offset=offset, + limit=limit, + document_ids=[UUID(id)], # Convert string ID to UUID + ) + + return collections_response["results"], { # type: ignore + "total_entries": collections_response["total_entries"] + } + + @self.router.post( + "/documents/{id}/extract", + summary="Extract entities and relationships", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.documents.extract( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + ) + """ + ), + }, + ], + }, + ) + @self.base_endpoint + async def extract( + id: UUID = Path( + ..., + description="The ID of the document to extract entities and relationships from.", + ), + run_type: KGRunType = Body( + default=KGRunType.RUN, + description="Whether to return an estimate of the creation cost or to actually extract the document.", + ), + settings: Optional[KGCreationSettings] = Body( + default=None, + description="Settings for the entities and relationships extraction process.", + ), + run_with_orchestration: Optional[bool] = Body( + default=True, + description="Whether to run the entities and relationships extraction process with orchestration.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedGenericMessageResponse: + """ + Extracts entities and relationships from a document. + + The entities and relationships extraction process involves: + + 1. Parsing documents into semantic chunks + + 2. Extracting entities and relationships using LLMs + + 3. Storing the created entities and relationships in the knowledge graph + + 4. Preserving the document's metadata and content, and associating the elements with collections the document belongs to + """ + + settings = settings.dict() if settings else None # type: ignore + if not auth_user.is_superuser: + raise R2RException( + "Only a superuser can extract entities and relationships from a document.", + 403, + ) + + # If no run type is provided, default to estimate + if not run_type: + run_type = KGRunType.ESTIMATE + + # Apply runtime settings overrides + server_graph_creation_settings = ( + self.providers.database.config.graph_creation_settings + ) + + if settings: + server_graph_creation_settings = update_settings_from_dict( + server_settings=server_graph_creation_settings, + settings_dict=settings, # type: ignore + ) + + if run_type is KGRunType.ESTIMATE: + return { # type: ignore + "message": "Estimate retrieved successfully", + "task_id": None, + "id": id, + "estimate": await self.services[ + "kg" + ].get_creation_estimate( + document_id=id, + graph_creation_settings=server_graph_creation_settings, + ), + } + + if run_with_orchestration: + workflow_input = { + "document_id": str(id), + "graph_creation_settings": server_graph_creation_settings.model_dump_json(), + "user": auth_user.json(), + } + + return await self.orchestration_provider.run_workflow( + "extract-triples", {"request": workflow_input}, {} + ) + else: + from core.main.orchestration import simple_kg_factory + + logger.info("Running extract-triples without orchestration.") + simple_kg = simple_kg_factory(self.services["kg"]) + await simple_kg["extract-triples"](workflow_input) + return { # type: ignore + "message": "Graph created successfully.", + "task_id": None, + } + + @self.router.get( + "/documents/{id}/entities", + summary="Lists the entities from the document", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.documents.extract( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + ) + """ + ), + }, + ], + }, + ) + @self.base_endpoint + async def get_entities( + id: UUID = Path( + ..., + description="The ID of the document to retrieve entities from.", + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + include_embeddings: Optional[bool] = Query( + False, + description="Whether to include vector embeddings in the response.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedEntitiesResponse: + """ + Retrieves the entities that were extracted from a document. These represent + important semantic elements like people, places, organizations, concepts, etc. + + Users can only access entities from documents they own or have access to through + collections. Entity embeddings are only included if specifically requested. + + Results are returned in the order they were extracted from the document. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + # First check if the document exists and user has access + documents_overview_response = await self.services[ + "management" + ].documents_overview( + user_ids=None if auth_user.is_superuser else [auth_user.id], + collection_ids=( + None + if auth_user.is_superuser + else auth_user.collection_ids + ), + document_ids=[id], + offset=0, + limit=1, + ) + + if not documents_overview_response["results"]: + raise R2RException("Document not found.", 404) + + # Get all entities for this document from the document_entity table + entities, count = ( + await self.providers.database.graph_handler.entities.get( + parent_id=id, + store_type="documents", + offset=offset, + limit=limit, + include_embeddings=include_embeddings, + ) + ) + + return entities, {"total_entries": count} # type: ignore + + @self.router.get( + "/documents/{id}/relationships", + summary="List document relationships", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.documents.list_relationships( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + offset=0, + limit=100 + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.documents.listRelationships({ + id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + offset: 0, + limit: 100, + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r documents list-relationships b4ac4dd6-5f27-596e-a55b-7cf242ca30aa + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/documents/b4ac4dd6-5f27-596e-a55b-7cf242ca30aa/relationships" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_relationships( + id: UUID = Path( + ..., + description="The ID of the document to retrieve relationships for.", + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + entity_names: Optional[list[str]] = Query( + None, + description="Filter relationships by specific entity names.", + ), + relationship_types: Optional[list[str]] = Query( + None, + description="Filter relationships by specific relationship types.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedRelationshipsResponse: + """ + Retrieves the relationships between entities that were extracted from a document. These represent + connections and interactions between entities found in the text. + + Users can only access relationships from documents they own or have access to through + collections. Results can be filtered by entity names and relationship types. + + Results are returned in the order they were extracted from the document. + """ + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + # First check if the document exists and user has access + documents_overview_response = await self.services[ + "management" + ].documents_overview( + user_ids=None if auth_user.is_superuser else [auth_user.id], + collection_ids=( + None + if auth_user.is_superuser + else auth_user.collection_ids + ), + document_ids=[id], + offset=0, + limit=1, + ) + + if not documents_overview_response["results"]: + raise R2RException("Document not found.", 404) + + # Get relationships for this document + relationships, count = ( + await self.providers.database.graph_handler.relationships.get( + parent_id=id, + store_type="documents", + entity_names=entity_names, + relationship_types=relationship_types, + offset=offset, + limit=limit, + ) + ) + + return relationships, {"total_entries": count} # type: ignore + + @staticmethod + async def _process_file(file): + import base64 + + content = await file.read() + return { + "filename": file.filename, + "content": base64.b64encode(content).decode("utf-8"), + "content_type": file.content_type, + } diff --git a/py/core/main/api/v3/graph_router.py b/py/core/main/api/v3/graph_router.py new file mode 100644 index 000000000..c0ac96b0c --- /dev/null +++ b/py/core/main/api/v3/graph_router.py @@ -0,0 +1,1877 @@ +import logging +import textwrap +from typing import Optional +from uuid import UUID + +from fastapi import Body, Depends, Path, Query + +from core.base import KGEnrichmentStatus, R2RException, RunType, Workflow +from core.base.abstractions import KGRunType +from core.base.api.models import ( + GenericBooleanResponse, + WrappedBooleanResponse, + WrappedCommunitiesResponse, + WrappedCommunityResponse, + WrappedEntitiesResponse, + WrappedEntityResponse, + WrappedGraphResponse, + WrappedGraphsResponse, + WrappedRelationshipResponse, + WrappedRelationshipsResponse, +) +from core.providers import ( + HatchetOrchestrationProvider, + SimpleOrchestrationProvider, +) +from core.utils import ( + generate_default_user_collection_id, + update_settings_from_dict, +) + +from .base_router import BaseRouterV3 + +logger = logging.getLogger() + + +class GraphRouter(BaseRouterV3): + def __init__( + self, + providers, + services, + orchestration_provider: ( + HatchetOrchestrationProvider | SimpleOrchestrationProvider + ), + run_type: RunType = RunType.KG, + ): + super().__init__(providers, services, orchestration_provider, run_type) + self._register_workflows() + + def _register_workflows(self): + + workflow_messages = {} + if self.orchestration_provider.config.provider == "hatchet": + workflow_messages["extract-triples"] = ( + "Graph creation task queued successfully." + ) + workflow_messages["build-communities"] = ( + "Graph enrichment task queued successfully." + ) + workflow_messages["entity-deduplication"] = ( + "KG Entity Deduplication task queued successfully." + ) + else: + workflow_messages["extract-triples"] = ( + "Document entities and relationships extracted successfully. To generate GraphRAG communities, POST to `/graphs//communities/build` with a collection this document belongs to." + ) + workflow_messages["build-communities"] = ( + "Graph communities created successfully. You can view the communities at http://localhost:7272/v2/communities" + ) + workflow_messages["entity-deduplication"] = ( + "KG Entity Deduplication completed successfully." + ) + + self.orchestration_provider.register_workflows( + Workflow.KG, + self.services["kg"], + workflow_messages, + ) + + async def _deduplicate_entities( + self, + collection_id: UUID, + settings, + run_type: Optional[KGRunType] = KGRunType.ESTIMATE, + run_with_orchestration: bool = True, + auth_user=None, + ): + """Deduplicates entities in the knowledge graph using LLM-based analysis. + + The deduplication process: + 1. Groups potentially duplicate entities by name/type + 2. Uses LLM analysis to determine if entities refer to same thing + 3. Merges duplicate entities while preserving relationships + 4. Updates all references to use canonical entity IDs + + Args: + id (UUID): Graph containing the entities + settings (dict, optional): Deduplication settings including: + - graph_entity_deduplication_type (str): Deduplication method (e.g. "by_name") + - graph_entity_deduplication_prompt (str): Custom prompt for analysis + - max_description_input_length (int): Max chars for entity descriptions + - generation_config (dict): LLM generation parameters + run_type (KGRunType): Whether to estimate cost or run deduplication + run_with_orchestration (bool): Whether to run async with task queue + auth_user: Authenticated user making request + + Returns: + Result containing: + message (str): Status message + task_id (UUID): Async task ID if run with orchestration + + Raises: + R2RException: If user unauthorized or deduplication fails + """ + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can deduplicate a graphs entities", 403 + ) + + server_settings = ( + self.providers.database.config.graph_entity_deduplication_settings + ) + if settings: + server_settings = update_settings_from_dict( + server_settings, settings + ) + + # Return cost estimate if requested + if run_type == KGRunType.ESTIMATE: + return await self.services["kg"].get_deduplication_estimate( + collection_id, server_settings + ) + + workflow_input = { + "graph_id": str(collection_id), + "graph_entity_deduplication_settings": server_settings.model_dump_json(), + "user": auth_user.model_dump_json(), + } + + if run_with_orchestration: + return await self.orchestration_provider.run_workflow( # type: ignore + "entity-deduplication", {"request": workflow_input}, {} + ) + else: + from core.main.orchestration import simple_kg_factory + + simple_kg = simple_kg_factory(self.services["kg"]) + await simple_kg["entity-deduplication"](workflow_input) + return { # type: ignore + "message": "Entity deduplication completed successfully.", + "task_id": None, + } + + async def _get_collection_id( + self, collection_id: Optional[UUID], auth_user + ) -> UUID: + """Helper method to get collection ID, using default if none provided""" + if collection_id is None: + return generate_default_user_collection_id(auth_user.id) + return collection_id + + def _setup_routes(self): + @self.router.get( + "/graphs", + summary="List graphs", + openapi_extra={ + "x-codeSamples": [ + { # TODO: Verify + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.list() + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.list({}); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def list_graphs( + collection_ids: list[str] = Query( + [], + description="A list of graph IDs to retrieve. If not provided, all graphs will be returned.", + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedGraphsResponse: + """ + Returns a paginated list of graphs the authenticated user has access to. + + Results can be filtered by providing specific graph IDs. Regular users will only see + graphs they own or have access to. Superusers can see all graphs. + + The graphs are returned in order of last modification, with most recent first. + """ + requesting_user_id = ( + None if auth_user.is_superuser else [auth_user.id] + ) + + graph_uuids = [UUID(graph_id) for graph_id in collection_ids] + + list_graphs_response = await self.services["kg"].list_graphs( + # user_ids=requesting_user_id, + graph_ids=graph_uuids, + offset=offset, + limit=limit, + ) + + return ( # type: ignore + list_graphs_response["results"], + {"total_entries": list_graphs_response["total_entries"]}, + ) + + @self.router.get( + "/graphs/{collection_id}", + summary="Retrieve graph details", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.get( + collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + )""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.retrieve({ + collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/graphs/d09dedb1-b2ab-48a5-b950-6e1f464d83e7" \\ + -H "Authorization: Bearer YOUR_API_KEY" """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_graph( + collection_id: UUID = Path(...), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedGraphResponse: + """ + Retrieves detailed information about a specific graph by ID. + """ + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the specified collection associated with the given graph.", + 403, + ) + + list_graphs_response = await self.services["kg"].list_graphs( + # user_ids=None, + graph_ids=[collection_id], + offset=0, + limit=1, + ) + return list_graphs_response["results"][0] + + @self.router.post( + "/graphs/{collection_id}/communities/build", + ) + @self.base_endpoint + async def build_communities( + collection_id: UUID = Path( + ..., description="The unique identifier of the collection" + ), + run_type: Optional[KGRunType] = Body( + default=KGRunType.ESTIMATE, + description="Run type for the graph enrichment process.", + ), + graph_enrichment_settings: Optional[dict] = Body( + default=None, + description="Settings for the graph enrichment process.", + ), + run_with_orchestration: Optional[bool] = Body(True), + auth_user=Depends(self.providers.auth.auth_wrapper), + ): + """ + Creates communities in the graph by analyzing entity relationships and similarities. + + Communities are created through the following process: + 1. Analyzes entity relationships and metadata to build a similarity graph + 2. Applies advanced community detection algorithms (e.g. Leiden) to identify densely connected groups + 3. Creates hierarchical community structure with multiple granularity levels + 4. Generates natural language summaries and statistical insights for each community + + The resulting communities can be used to: + - Understand high-level graph structure and organization + - Identify key entity groupings and their relationships + - Navigate and explore the graph at different levels of detail + - Generate insights about entity clusters and their characteristics + + The community detection process is configurable through settings like: + - Community detection algorithm parameters + - Summary generation prompt + """ + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can build communities", 403 + ) + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + # If no collection ID is provided, use the default user collection + # id = generate_default_user_collection_id(auth_user.id) + + # If no run type is provided, default to estimate + if not run_type: + run_type = KGRunType.ESTIMATE + + # Apply runtime settings overrides + server_graph_enrichment_settings = ( + self.providers.database.config.graph_enrichment_settings + ) + if graph_enrichment_settings: + server_graph_enrichment_settings = update_settings_from_dict( + server_graph_enrichment_settings, graph_enrichment_settings + ) + + # If the run type is estimate, return an estimate of the enrichment cost + # if run_type is KGRunType.ESTIMATE: + # return await self.services["kg"].get_enrichment_estimate( + # collection_id=id, + # graph_enrichment_settings=server_graph_enrichment_settings, + # ) + + # Otherwise, run the enrichment workflow + # else: + if run_with_orchestration: + workflow_input = { + "collection_id": str(collection_id), + "graph_enrichment_settings": server_graph_enrichment_settings.model_dump_json(), + "user": auth_user.json(), + } + + return await self.orchestration_provider.run_workflow( # type: ignore + "build-communities", {"request": workflow_input}, {} + ) + else: + from core.main.orchestration import simple_kg_factory + + logger.info("Running build-communities without orchestration.") + simple_kg = simple_kg_factory(self.services["kg"]) + await simple_kg["build-communities"](workflow_input) + return { + "message": "Graph communities created successfully.", + "task_id": None, + } + + @self.router.post( + "/graphs/{collection_id}/reset", + summary="Reset a graph back to the initial state.", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.reset( + collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + )""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.reset({ + collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/graphs/d09dedb1-b2ab-48a5-b950-6e1f464d83e7/reset" \\ + -H "Authorization: Bearer YOUR_API_KEY" """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def reset( + collection_id: UUID = Path(...), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Deletes a graph and all its associated data. + + This endpoint permanently removes the specified graph along with all + entities and relationships that belong to only this graph. + The original source entities and relationships extracted from underlying documents are not deleted + and are managed through the document lifecycle. + """ + if not auth_user.is_superuser: + raise R2RException("Only superusers can reset a graph", 403) + + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + await self.services["kg"].reset_graph_v3(id=collection_id) + # await _pull(collection_id, auth_user) + return GenericBooleanResponse(success=True) # type: ignore + + # update graph + @self.router.post( + "/graphs/{collection_id}", + summary="Update graph", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.update( + collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + graph={ + "name": "New Name", + "description": "New Description" + } + )""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.update({ + collection_id: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + name: "New Name", + description: "New Description", + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def update_graph( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph to update", + ), + name: Optional[str] = Body( + None, description="The name of the graph" + ), + description: Optional[str] = Body( + None, description="An optional description of the graph" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ): + """ + Update an existing graphs's configuration. + + This endpoint allows updating the name and description of an existing collection. + The user must have appropriate permissions to modify the collection. + """ + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can update graph details", 403 + ) + + if ( + not auth_user.is_superuser + and id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + return await self.services["kg"].update_graph( # type: ignore + collection_id, + name=name, + description=description, + ) + + @self.router.get( + "/graphs/{collection_id}/entities", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.get_entities(collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.get_entities({ + collection_id: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + }); + } + + main(); + """ + ), + }, + ], + }, + ) + @self.base_endpoint + async def get_entities( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph to list entities from.", + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedEntitiesResponse: + """Lists all entities in the graph with pagination support.""" + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + entities, count = await self.services["kg"].get_entities( + parent_id=collection_id, + offset=offset, + limit=limit, + ) + + return entities, { # type: ignore + "total_entries": count, + } + + @self.router.post("/graphs/{collection_id}/entities") + @self.base_endpoint + async def create_entity( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph to add the entity to.", + ), + name: str = Body( + ..., description="The name of the entity to create." + ), + description: Optional[str] = Body( + None, description="The description of the entity to create." + ), + category: Optional[str] = Body( + None, description="The category of the entity to create." + ), + metadata: Optional[dict] = Body( + None, description="The metadata of the entity to create." + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedEntityResponse: + """Creates a new entity in the graph.""" + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + return await self.services["kg"].create_entity( + name=name, + description=description, + parent_id=collection_id, + category=category, + metadata=metadata, + ) + + @self.router.post("/graphs/{collection_id}/relationships") + @self.base_endpoint + async def create_relationship( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph to add the relationship to.", + ), + subject: str = Body( + ..., description="The subject of the relationship to create." + ), + subject_id: UUID = Body( + ..., + description="The ID of the subject of the relationship to create.", + ), + predicate: str = Body( + ..., description="The predicate of the relationship to create." + ), + object: str = Body( + ..., description="The object of the relationship to create." + ), + object_id: UUID = Body( + ..., + description="The ID of the object of the relationship to create.", + ), + description: Optional[str] = Body( + None, + description="The description of the relationship to create.", + ), + weight: Optional[float] = Body( + None, description="The weight of the relationship to create." + ), + metadata: Optional[dict] = Body( + None, description="The metadata of the relationship to create." + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedRelationshipResponse: + """Creates a new relationship in the graph.""" + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can create relationships.", 403 + ) + + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + return await self.services["kg"].create_relationship( + subject=subject, + subject_id=subject_id, + predicate=predicate, + object=object, + object_id=object_id, + description=description, + weight=weight, + metadata=metadata, + parent_id=collection_id, + ) + + @self.router.get( + "/graphs/{collection_id}/entities/{entity_id}", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.get_entity( + collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + entity_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.get_entity({ + collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + entityId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_entity( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph containing the entity.", + ), + entity_id: UUID = Path( + ..., description="The ID of the entity to retrieve." + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedEntityResponse: + """Retrieves a specific entity by its ID.""" + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + result = await self.providers.database.graph_handler.entities.get( + parent_id=collection_id, + store_type="graphs", + offset=0, + limit=1, + entity_ids=[entity_id], + ) + if len(result) == 0 or len(result[0]) == 0: + raise R2RException("Entity not found", 404) + return result[0][0] + + @self.router.post("/graphs/{collection_id}/entities/{entity_id}") + @self.base_endpoint + async def update_entity( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph containing the entity.", + ), + entity_id: UUID = Path( + ..., description="The ID of the entity to update." + ), + name: Optional[str] = Body( + ..., description="The updated name of the entity." + ), + description: Optional[str] = Body( + None, description="The updated description of the entity." + ), + category: Optional[str] = Body( + None, description="The updated category of the entity." + ), + metadata: Optional[dict] = Body( + None, description="The updated metadata of the entity." + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedEntityResponse: + """Updates an existing entity in the graph.""" + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can update graph entities.", 403 + ) + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + return await self.services["kg"].update_entity( + entity_id=entity_id, + name=name, + category=category, + description=description, + metadata=metadata, + ) + + @self.router.delete( + "/graphs/{collection_id}/entities/{entity_id}", + summary="Remove an entity", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.remove_entity( + collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + entity_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.removeEntity({ + collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + entityId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def delete_entity( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph to remove the entity from.", + ), + entity_id: UUID = Path( + ..., + description="The ID of the entity to remove from the graph.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """Removes an entity from the graph.""" + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can delete graph details.", 403 + ) + + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + await self.services["kg"].delete_entity( + parent_id=collection_id, + entity_id=entity_id, + ) + + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.get( + "/graphs/{collection_id}/relationships", + description="Lists all relationships in the graph with pagination support.", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.list_relationships(collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.listRelationships({ + collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + }); + } + + main(); + """ + ), + }, + ], + }, + ) + @self.base_endpoint + async def get_relationships( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph to list relationships from.", + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedRelationshipsResponse: + """ + Lists all relationships in the graph with pagination support. + """ + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + relationships, count = await self.services["kg"].get_relationships( + parent_id=collection_id, + offset=offset, + limit=limit, + ) + + return relationships, { # type: ignore + "total_entries": count, + } + + @self.router.get( + "/graphs/{collection_id}/relationships/{relationship_id}", + description="Retrieves a specific relationship by its ID.", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.get_relationship( + collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + relationship_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.getRelationship({ + collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + relationshipId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + }); + } + + main(); + """ + ), + }, + ], + }, + ) + @self.base_endpoint + async def get_relationship( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph containing the relationship.", + ), + relationship_id: UUID = Path( + ..., description="The ID of the relationship to retrieve." + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedRelationshipResponse: + """Retrieves a specific relationship by its ID.""" + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + results = ( + await self.providers.database.graph_handler.relationships.get( + parent_id=collection_id, + store_type="graphs", + offset=0, + limit=1, + relationship_ids=[relationship_id], + ) + ) + if len(results) == 0 or len(results[0]) == 0: + raise R2RException("Relationship not found", 404) + return results[0][0] + + @self.router.post( + "/graphs/{collection_id}/relationships/{relationship_id}" + ) + @self.base_endpoint + async def update_relationship( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph containing the relationship.", + ), + relationship_id: UUID = Path( + ..., description="The ID of the relationship to update." + ), + subject: Optional[str] = Body( + ..., description="The updated subject of the relationship." + ), + subject_id: Optional[UUID] = Body( + ..., description="The updated subject ID of the relationship." + ), + predicate: Optional[str] = Body( + ..., description="The updated predicate of the relationship." + ), + object: Optional[str] = Body( + ..., description="The updated object of the relationship." + ), + object_id: Optional[UUID] = Body( + ..., description="The updated object ID of the relationship." + ), + description: Optional[str] = Body( + None, + description="The updated description of the relationship.", + ), + weight: Optional[float] = Body( + None, description="The updated weight of the relationship." + ), + metadata: Optional[dict] = Body( + None, description="The updated metadata of the relationship." + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedRelationshipResponse: + """Updates an existing relationship in the graph.""" + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can update graph details", 403 + ) + + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + return await self.services["kg"].update_relationship( + relationship_id=relationship_id, + subject=subject, + subject_id=subject_id, + predicate=predicate, + object=object, + object_id=object_id, + description=description, + weight=weight, + metadata=metadata, + ) + + @self.router.delete( + "/graphs/{collection_id}/relationships/{relationship_id}", + description="Removes a relationship from the graph.", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.delete_relationship( + collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + relationship_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.deleteRelationship({ + collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + relationshipId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + }); + } + + main(); + """ + ), + }, + ], + }, + ) + @self.base_endpoint + async def delete_relationship( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph to remove the relationship from.", + ), + relationship_id: UUID = Path( + ..., + description="The ID of the relationship to remove from the graph.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """Removes a relationship from the graph.""" + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can delete a relationship.", 403 + ) + + if ( + not auth_user.is_superuser + and collection_id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + await self.services["kg"].delete_relationship( + parent_id=collection_id, + relationship_id=relationship_id, + ) + + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.post( + "/graphs/{collection_id}/communities", + summary="Create a new community", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.create_community( + collection_id="9fbe403b-c11c-5aae-8ade-ef22980c3ad1", + name="My Community", + summary="A summary of the community", + findings=["Finding 1", "Finding 2"], + rating=5, + rating_explanation="This is a rating explanation", + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.createCommunity({ + collectionId: "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", + name: "My Community", + summary: "A summary of the community", + findings: ["Finding 1", "Finding 2"], + rating: 5, + ratingExplanation: "This is a rating explanation", + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def create_community( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph to create the community in.", + ), + name: str = Body(..., description="The name of the community"), + summary: str = Body(..., description="A summary of the community"), + findings: Optional[list[str]] = Body( + default=[], description="Findings about the community" + ), + rating: Optional[float] = Body( + default=5, ge=1, le=10, description="Rating between 1 and 10" + ), + rating_explanation: Optional[str] = Body( + default="", description="Explanation for the rating" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedCommunityResponse: + """ + Creates a new community in the graph. + + While communities are typically built automatically via the /graphs/{id}/communities/build endpoint, + this endpoint allows you to manually create your own communities. + + This can be useful when you want to: + - Define custom groupings of entities based on domain knowledge + - Add communities that weren't detected by the automatic process + - Create hierarchical organization structures + - Tag groups of entities with specific metadata + + The created communities will be integrated with any existing automatically detected communities + in the graph's community structure. + """ + if not auth_user.is_superuser: + raise R2RException( + "Only superusers can create a community.", 403 + ) + + if ( + not auth_user.is_superuser + and collection_id not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + return await self.services["kg"].create_community( + parent_id=collection_id, + name=name, + summary=summary, + findings=findings, + rating=rating, + rating_explanation=rating_explanation, + ) + + @self.router.get( + "/graphs/{collection_id}/communities", + summary="List communities", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.list_communities(collection_id="9fbe403b-c11c-5aae-8ade-ef22980c3ad1") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.listCommunities({ + collectionId: "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_communities( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph to get communities for.", + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedCommunitiesResponse: + """ + Lists all communities in the graph with pagination support. + """ + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + communities, count = await self.services["kg"].get_communities( + parent_id=collection_id, + offset=offset, + limit=limit, + ) + + return communities, { # type: ignore + "total_entries": count, + } + + @self.router.get( + "/graphs/{collection_id}/communities/{community_id}", + summary="Retrieve a community", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.get_community(collection_id="9fbe403b-c11c-5aae-8ade-ef22980c3ad1") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.getCommunity({ + collectionId: "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_community( + collection_id: UUID = Path( + ..., + description="The ID of the collection to get communities for.", + ), + community_id: UUID = Path( + ..., + description="The ID of the community to get.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedCommunityResponse: + """ + Retrieves a specific community by its ID. + """ + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + results = await self.services[ + "kg" + ].providers.database.graph_handler.communities.get( + parent_id=collection_id, + community_ids=[community_id], + store_type="graphs", + offset=0, + limit=1, + ) + if len(results) == 0 or len(results[0]) == 0: + raise R2RException("Community not found", 404) + return results[0][0] + + @self.router.delete( + "/graphs/{collection_id}/communities/{community_id}", + summary="Delete a community", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.delete_community( + collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + community_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.graphs.deleteCommunity({ + collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + communityId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def delete_community( + collection_id: UUID = Path( + ..., + description="The collection ID corresponding to the graph to delete the community from.", + ), + community_id: UUID = Path( + ..., + description="The ID of the community to delete.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ): + if ( + not auth_user.is_superuser + and collection_id not in auth_user.graph_ids + ): + raise R2RException( + "Only superusers can delete communities", 403 + ) + + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + await self.services["kg"].delete_community( + parent_id=collection_id, + community_id=community_id, + ) + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.post( + "/graphs/{collection_id}/communities/{community_id}", + summary="Update community", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.update_community( + collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + community_update={ + "metadata": { + "topic": "Technology", + "description": "Tech companies and products" + } + } + )""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + async function main() { + const response = await client.graphs.updateCommunity({ + collectionId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + communityId: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7", + communityUpdate: { + metadata: { + topic: "Technology", + description: "Tech companies and products" + } + } + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def update_community( + collection_id: UUID = Path(...), + community_id: UUID = Path(...), + name: Optional[str] = Body(None), + summary: Optional[str] = Body(None), + findings: Optional[list[str]] = Body(None), + rating: Optional[float] = Body(default=None, ge=1, le=10), + rating_explanation: Optional[str] = Body(None), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedCommunityResponse: + """ + Updates an existing community in the graph. + """ + if ( + not auth_user.is_superuser + and collection_id not in auth_user.graph_ids + ): + raise R2RException( + "Only superusers can update communities.", 403 + ) + + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + return await self.services["kg"].update_community( + community_id=community_id, + name=name, + summary=summary, + findings=findings, + rating=rating, + rating_explanation=rating_explanation, + ) + + @self.router.post( + "/graphs/{collection_id}/pull", + summary="Pull latest entities to the graph", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response = client.graphs.pull( + collection_id="d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + )""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + async function main() { + const response = await client.graphs.pull({ + collection_id: "d09dedb1-b2ab-48a5-b950-6e1f464d83e7" + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def pull( + collection_id: UUID = Path( + ..., description="The ID of the graph to initialize." + ), + force: Optional[bool] = Body( + False, + description="If true, forces a re-pull of all entities and relationships.", + ), + # document_ids: list[UUID] = Body( + # ..., description="List of document IDs to add to the graph." + # ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Adds documents to a graph by copying their entities and relationships. + + This endpoint: + 1. Copies document entities to the graphs_entities table + 2. Copies document relationships to the graphs_relationships table + 3. Associates the documents with the graph + + When a document is added: + - Its entities and relationships are copied to graph-specific tables + - Existing entities/relationships are updated by merging their properties + - The document ID is recorded in the graph's document_ids array + + Documents added to a graph will contribute their knowledge to: + - Graph analysis and querying + - Community detection + - Knowledge graph enrichment + + The user must have access to both the graph and the documents being added. + """ + # Check user permissions for graph + if not auth_user.is_superuser: + raise R2RException("Only superusers can `pull` a graph.", 403) + + if ( + # not auth_user.is_superuser + collection_id + not in auth_user.collection_ids + ): + raise R2RException( + "The currently authenticated user does not have access to the collection associated with the given graph.", + 403, + ) + + list_graphs_response = await self.services["kg"].list_graphs( + # user_ids=None, + graph_ids=[collection_id], + offset=0, + limit=1, + ) + if len(list_graphs_response["results"]) == 0: + raise R2RException("Graph not found", 404) + collection_id = list_graphs_response["results"][0].collection_id + documents = [] + document_req = ( + await self.providers.database.collections_handler.documents_in_collection( + collection_id, offset=0, limit=100 + ) + )["results"] + documents.extend(document_req) + while len(document_req) == 100: + document_req = ( + await self.providers.database.collections_handler.documents_in_collection( + collection_id, offset=len(documents), limit=100 + ) + )["results"] + documents.extend(document_req) + + success = False + + for document in documents: + # TODO - Add better checks for user permissions + if ( + not auth_user.is_superuser + and document.id + not in auth_user.document_ids # TODO - extend to include checks on collections + ): + raise R2RException( + f"The currently authenticated user does not have access to document {document.id}", + 403, + ) + entities = ( + await self.providers.database.graph_handler.entities.get( + parent_id=document.id, + store_type="documents", + offset=0, + limit=100, + ) + ) + has_document = ( + await self.providers.database.graph_handler.has_document( + collection_id, document.id + ) + ) + if has_document: + logger.info( + f"Document {document.id} is already in graph {collection_id}, skipping." + ) + continue + if len(entities[0]) == 0: + if not force: + logger.warning( + f"Document {document.id} has no entities, extraction may not have been called, skipping." + ) + continue + else: + logger.warning( + f"Document {document.id} has no entities, but force=True, continuing." + ) + + success = ( + await self.providers.database.graph_handler.add_documents( + id=collection_id, + document_ids=[document.id], + ) + ) + if not success: + logger.warning( + f"No documents were added to graph {collection_id}, marking as failed." + ) + + if success: + await self.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.SUCCESS, + ) + + return GenericBooleanResponse(success=success) # type: ignore diff --git a/py/core/main/api/v3/indices_router.py b/py/core/main/api/v3/indices_router.py new file mode 100644 index 000000000..b9d61f300 --- /dev/null +++ b/py/core/main/api/v3/indices_router.py @@ -0,0 +1,625 @@ +# TODO - Move indices to 'id' basis +# TODO - Implement update index +# TODO - Implement index data model + +import logging +import textwrap +from typing import Optional + +from fastapi import Body, Depends, Path, Query + +from core.base import IndexConfig, R2RException, RunType +from core.base.abstractions import VectorTableName +from core.base.api.models import ( + GenericMessageResponse, + WrappedGenericMessageResponse, + WrappedListVectorIndicesResponse, +) +from core.providers import ( + HatchetOrchestrationProvider, + SimpleOrchestrationProvider, +) + +from .base_router import BaseRouterV3 + +logger = logging.getLogger() + + +class IndicesRouter(BaseRouterV3): + + def __init__( + self, + providers, + services, + orchestration_provider: ( + HatchetOrchestrationProvider | SimpleOrchestrationProvider + ), + run_type: RunType = RunType.INGESTION, + ): + super().__init__(providers, services, orchestration_provider, run_type) + + def _setup_routes(self): + + ## TODO - Allow developer to pass the index id with the request + @self.router.post( + "/indices", + summary="Create Vector Index", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + # Create an HNSW index for efficient similarity search + result = client.indices.create( + config={ + "table_name": "vectors", # The table containing vector embeddings + "index_method": "hnsw", # Hierarchical Navigable Small World graph + "index_measure": "cosine_distance", # Similarity measure + "index_arguments": { + "m": 16, # Number of connections per layer + "ef_construction": 64,# Size of dynamic candidate list for construction + "ef": 40, # Size of dynamic candidate list for search + }, + "index_name": "my_document_embeddings_idx", + "index_column": "embedding", + "concurrently": True # Build index without blocking table writes + }, + run_with_orchestration=True # Run as orchestrated task for large indices + ) + + # Create an IVF-Flat index for balanced performance + result = client.indices.create( + config={ + "table_name": "vectors", + "index_method": "ivf_flat", # Inverted File with Flat storage + "index_measure": "l2_distance", + "index_arguments": { + "lists": 100, # Number of cluster centroids + "probe": 10, # Number of clusters to search + }, + "index_name": "my_ivf_embeddings_idx", + "index_column": "embedding", + "concurrently": True + } + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.indicies.create({ + config: { + tableName: "vectors", + indexMethod: "hnsw", + indexMeasure: "cosine_distance", + indexArguments: { + m: 16, + ef_construction: 64, + ef: 40 + }, + indexName: "my_document_embeddings_idx", + indexColumn: "embedding", + concurrently: true + }, + runWithOrchestration: true + }); + } + + main(); + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + # Create HNSW Index + curl -X POST "https://api.example.com/indices" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{ + "config": { + "table_name": "vectors", + "index_method": "hnsw", + "index_measure": "cosine_distance", + "index_arguments": { + "m": 16, + "ef_construction": 64, + "ef": 40 + }, + "index_name": "my_document_embeddings_idx", + "index_column": "embedding", + "concurrently": true + }, + "run_with_orchestration": true + }' + + # Create IVF-Flat Index + curl -X POST "https://api.example.com/indices" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{ + "config": { + "table_name": "vectors", + "index_method": "ivf_flat", + "index_measure": "l2_distance", + "index_arguments": { + "lists": 100, + "probe": 10 + }, + "index_name": "my_ivf_embeddings_idx", + "index_column": "embedding", + "concurrently": true + } + }' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def create_index( + config: IndexConfig, + run_with_orchestration: Optional[bool] = Body( + True, + description="Whether to run index creation as an orchestrated task (recommended for large indices)", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedGenericMessageResponse: + """ + Create a new vector similarity search index in over the target table. Allowed tables include 'vectors', 'entity', 'document_collections'. + Vectors correspond to the chunks of text that are indexed for similarity search, whereas entity and document_collections are created during knowledge graph construction. + + This endpoint creates a database index optimized for efficient similarity search over vector embeddings. + It supports two main indexing methods: + + 1. HNSW (Hierarchical Navigable Small World): + - Best for: High-dimensional vectors requiring fast approximate nearest neighbor search + - Pros: Very fast search, good recall, memory-resident for speed + - Cons: Slower index construction, more memory usage + - Key parameters: + * m: Number of connections per layer (higher = better recall but more memory) + * ef_construction: Build-time search width (higher = better recall but slower build) + * ef: Query-time search width (higher = better recall but slower search) + + 2. IVF-Flat (Inverted File with Flat Storage): + - Best for: Balance between build speed, search speed, and recall + - Pros: Faster index construction, less memory usage + - Cons: Slightly slower search than HNSW + - Key parameters: + * lists: Number of clusters (usually sqrt(n) where n is number of vectors) + * probe: Number of nearest clusters to search + + Supported similarity measures: + - cosine_distance: Best for comparing semantic similarity + - l2_distance: Best for comparing absolute distances + - ip_distance: Best for comparing raw dot products + + Notes: + - Index creation can be resource-intensive for large datasets + - Use run_with_orchestration=True for large indices to prevent timeouts + - The 'concurrently' option allows other operations while building + - Index names must be unique per table + """ + # TODO: Implement index creation logic + logger.info( + f"Creating vector index for {config.table_name} with method {config.index_method}, measure {config.index_measure}, concurrently {config.concurrently}" + ) + + raw_message = await self.orchestration_provider.run_workflow( + "create-vector-index", + { + "request": { + "table_name": config.table_name, + "index_method": config.index_method, + "index_measure": config.index_measure, + "index_name": config.index_name, + "index_column": config.index_column, + "index_arguments": config.index_arguments, + "concurrently": config.concurrently, + }, + }, + options={ + "additional_metadata": {}, + }, + ) + + return GenericMessageResponse(message=raw_message) # type: ignore + + @self.router.get( + "/indices", + summary="List Vector Indices", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + + # List all indices + indices = client.indices.list( + offset=0, + limit=10, + filters={"table_name": "vectors"} + ) + + # Print index details + for idx in indices: + print(f"Index: {idx['name']}") + print(f"Method: {idx['method']}") + print(f"Size: {idx['size_bytes'] / 1024 / 1024:.2f} MB") + print(f"Row count: {idx['row_count']}") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.indicies.list({ + offset: 0, + limit: 10, + filters: { table_name: "vectors" } + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r indices list + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/indices?offset=0&limit=10" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -H "Content-Type: application/json" + + # With filters + curl -X GET "https://api.example.com/indices?offset=0&limit=10&filters={\"table_name\":\"vectors\"}" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -H "Content-Type: application/json" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def list_indices( + filters: list[str] = Query([]), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedListVectorIndicesResponse: + """ + List existing vector similarity search indices with pagination support. + + Returns details about each index including: + - Name and table name + - Indexing method and parameters + - Size and row count + - Creation timestamp and last updated + - Performance statistics (if available) + + The response can be filtered using the filter_by parameter to narrow down results + based on table name, index method, or other attributes. + """ + # TODO: Implement index listing logic + indices = await self.providers.database.list_indices( + offset=offset, limit=limit, filters=filters + ) + return {"indices": indices["indices"]}, indices["page_info"] # type: ignore + + @self.router.get( + "/indices/{table_name}/{index_name}", + summary="Get Vector Index Details", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + + # Get detailed information about a specific index + index = client.indices.retrieve("index_1") + + # Access index details + print(f"Index Method: {index['method']}") + print(f"Parameters: {index['parameters']}") + print(f"Performance Stats: {index['stats']}") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.indicies.retrieve({ + indexName: "index_1", + tableName: "vectors" + }); + + console.log(response); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r indices retrieve index_1 vectors + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/indices/vectors/index_1" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_index( + table_name: VectorTableName = Path( + ..., + description="The table of vector embeddings to delete (e.g. `vectors`, `entity`, `document_collections`)", + ), + index_name: str = Path( + ..., description="The name of the index to delete" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> dict: # -> WrappedGetIndexResponse: + """ + Get detailed information about a specific vector index. + + Returns comprehensive information about the index including: + - Configuration details (method, measure, parameters) + - Current size and row count + - Build progress (if still under construction) + - Performance statistics: + * Average query time + * Memory usage + * Cache hit rates + * Recent query patterns + - Maintenance information: + * Last vacuum + * Fragmentation level + * Recommended optimizations + """ + # TODO: Implement get index logic + indices = await self.providers.database.list_indices( + filters={"index_name": index_name, "table_name": table_name} + ) + if len(indices["indices"]) != 1: + raise R2RException( + f"Index '{index_name}' not found", status_code=404 + ) + return {"index": indices["indices"][0]} + + # TODO - Implement update index + # @self.router.post( + # "/indices/{name}", + # summary="Update Vector Index", + # openapi_extra={ + # "x-codeSamples": [ + # { + # "lang": "Python", + # "source": """ + # from r2r import R2RClient + + # client = R2RClient("http://localhost:7272") + + # # Update HNSW index parameters + # result = client.indices.update( + # "550e8400-e29b-41d4-a716-446655440000", + # config={ + # "index_arguments": { + # "ef": 80, # Increase search quality + # "m": 24 # Increase connections per layer + # }, + # "concurrently": True + # }, + # run_with_orchestration=True + # )""", + # }, + # { + # "lang": "Shell", + # "source": """ + # curl -X PUT "https://api.example.com/indices/550e8400-e29b-41d4-a716-446655440000" \\ + # -H "Content-Type: application/json" \\ + # -H "Authorization: Bearer YOUR_API_KEY" \\ + # -d '{ + # "config": { + # "index_arguments": { + # "ef": 80, + # "m": 24 + # }, + # "concurrently": true + # }, + # "run_with_orchestration": true + # }'""", + # }, + # ] + # }, + # ) + # @self.base_endpoint + # async def update_index( + # id: UUID = Path(...), + # config: IndexConfig = Body(...), + # run_with_orchestration: Optional[bool] = Body(True), + # auth_user=Depends(self.providers.auth.auth_wrapper), + # ): # -> WrappedUpdateIndexResponse: + # """ + # Update an existing index's configuration. + # """ + # # TODO: Implement index update logic + # pass + + @self.router.delete( + "/indices/{table_name}/{index_name}", + summary="Delete Vector Index", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + + # Delete an index with orchestration for cleanup + result = client.indices.delete( + index_name="index_1", + table_name="vectors", + run_with_orchestration=True + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.indicies.delete({ + indexName: "index_1" + tableName: "vectors" + }); + + console.log(response); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r indices delete index_1 vectors + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/indices/index_1" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def delete_index( + table_name: VectorTableName = Path( + default=..., + description="The table of vector embeddings to delete (e.g. `vectors`, `entity`, `document_collections`)", + ), + index_name: str = Path( + ..., description="The name of the index to delete" + ), + # concurrently: bool = Body( + # default=True, + # description="Whether to delete the index concurrently (recommended for large indices)", + # ), + # run_with_orchestration: Optional[bool] = Body(True), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedGenericMessageResponse: + """ + Delete an existing vector similarity search index. + + This endpoint removes the specified index from the database. Important considerations: + + - Deletion is permanent and cannot be undone + - Underlying vector data remains intact + - Queries will fall back to sequential scan + - Running queries during deletion may be slower + - Use run_with_orchestration=True for large indices to prevent timeouts + - Consider index dependencies before deletion + + The operation returns immediately but cleanup may continue in background. + """ + logger.info( + f"Deleting vector index {index_name} from table {table_name}" + ) + + raw_message = await self.orchestration_provider.run_workflow( + "delete-vector-index", + { + "request": { + "index_name": index_name, + "table_name": table_name, + "concurrently": True, + }, + }, + options={ + "additional_metadata": {}, + }, + ) + + return GenericMessageResponse(message=raw_message) # type: ignore diff --git a/py/core/main/api/v3/prompts_router.py b/py/core/main/api/v3/prompts_router.py new file mode 100644 index 000000000..5de440f7a --- /dev/null +++ b/py/core/main/api/v3/prompts_router.py @@ -0,0 +1,442 @@ +import textwrap +from typing import Optional + +from fastapi import Body, Depends, Path, Query + +from core.base import R2RException, RunType +from core.base.api.models import ( + GenericBooleanResponse, + GenericMessageResponse, + WrappedBooleanResponse, + WrappedGenericMessageResponse, + WrappedPromptResponse, + WrappedPromptsResponse, +) +from core.providers import ( + HatchetOrchestrationProvider, + SimpleOrchestrationProvider, +) + +from .base_router import BaseRouterV3 + + +class PromptsRouter(BaseRouterV3): + def __init__( + self, + providers, + services, + orchestration_provider: ( + HatchetOrchestrationProvider | SimpleOrchestrationProvider + ), + run_type: RunType = RunType.MANAGEMENT, + ): + super().__init__(providers, services, orchestration_provider, run_type) + + def _setup_routes(self): + @self.router.post( + "/prompts", + summary="Create a new prompt", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.prompts.create( + name="greeting_prompt", + template="Hello, {name}!", + input_types={"name": "string"} + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.prompts.create({ + name: "greeting_prompt", + template: "Hello, {name}!", + inputTypes: { name: "string" }, + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/prompts" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -H "Content-Type: application/json" \\ + -d '{"name": "greeting_prompt", "template": "Hello, {name}!", "input_types": {"name": "string"}}' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def create_prompt( + name: str = Body(..., description="The name of the prompt"), + template: str = Body( + ..., description="The template string for the prompt" + ), + input_types: dict[str, str] = Body( + default={}, + description="A dictionary mapping input names to their types", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedGenericMessageResponse: + """ + Create a new prompt with the given configuration. + + This endpoint allows superusers to create a new prompt with a specified name, template, and input types. + """ + if not auth_user.is_superuser: + raise R2RException( + "Only a superuser can create prompts.", + 403, + ) + result = await self.services["management"].add_prompt( + name, template, input_types + ) + return GenericMessageResponse(message=result) # type: ignore + + @self.router.get( + "/prompts", + summary="List all prompts", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.prompts.list() + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.prompts.list(); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r prompts list + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/v3/prompts" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_prompts( + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedPromptsResponse: + """ + List all available prompts. + + This endpoint retrieves a list of all prompts in the system. Only superusers can access this endpoint. + """ + if not auth_user.is_superuser: + raise R2RException( + "Only a superuser can list prompts.", + 403, + ) + get_prompts_response = await self.services[ + "management" + ].get_all_prompts() + + return ( # type: ignore + get_prompts_response["results"], + { + "total_entries": get_prompts_response["total_entries"], + }, + ) + + @self.router.post( + "/prompts/{name}", + summary="Get a specific prompt", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.prompts.get( + "greeting_prompt", + inputs={"name": "John"}, + prompt_override="Hi, {name}!" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.prompts.retrieve({ + name: "greeting_prompt", + inputs: { name: "John" }, + promptOverride: "Hi, {name}!", + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r prompts retrieve greeting_prompt --inputs '{"name": "John"}' --prompt-override "Hi, {name}!" + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/prompts/greeting_prompt?inputs=%7B%22name%22%3A%22John%22%7D&prompt_override=Hi%2C%20%7Bname%7D!" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_prompt( + name: str = Path(..., description="Prompt name"), + inputs: Optional[dict[str, str]] = Body( + None, description="Prompt inputs" + ), + prompt_override: Optional[str] = Query( + None, description="Prompt override" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedPromptResponse: + """ + Get a specific prompt by name, optionally with inputs and override. + + This endpoint retrieves a specific prompt and allows for optional inputs and template override. + Only superusers can access this endpoint. + """ + if not auth_user.is_superuser: + raise R2RException( + "Only a superuser can retrieve prompts.", + 403, + ) + result = await self.services["management"].get_prompt( + name, inputs, prompt_override + ) + return result # type: ignore + + @self.router.put( + "/prompts/{name}", + summary="Update an existing prompt", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.prompts.update( + "greeting_prompt", + template="Greetings, {name}!", + input_types={"name": "string", "age": "integer"} + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.prompts.update({ + name: "greeting_prompt", + template: "Greetings, {name}!", + inputTypes: { name: "string", age: "integer" }, + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X PUT "https://api.example.com/v3/prompts/greeting_prompt" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -H "Content-Type: application/json" \\ + -d '{"template": "Greetings, {name}!", "input_types": {"name": "string", "age": "integer"}}' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def update_prompt( + name: str = Path(..., description="Prompt name"), + template: Optional[str] = Body( + None, description="Updated prompt template" + ), + input_types: dict[str, str] = Body( + default={}, + description="A dictionary mapping input names to their types", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedGenericMessageResponse: + """ + Update an existing prompt's template and/or input types. + + This endpoint allows superusers to update the template and input types of an existing prompt. + """ + if not auth_user.is_superuser: + raise R2RException( + "Only a superuser can update prompts.", + 403, + ) + result = await self.services["management"].update_prompt( + name, template, input_types + ) + return GenericMessageResponse(message=result) # type: ignore + + @self.router.delete( + "/prompts/{name}", + summary="Delete a prompt", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.prompts.delete("greeting_prompt") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.prompts.delete({ + name: "greeting_prompt", + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r prompts delete greeting_prompt + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/v3/prompts/greeting_prompt" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def delete_prompt( + name: str = Path(..., description="Prompt name"), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Delete a prompt by name. + + This endpoint allows superusers to delete an existing prompt. + """ + if not auth_user.is_superuser: + raise R2RException( + "Only a superuser can delete prompts.", + 403, + ) + await self.services["management"].delete_prompt(name) + return GenericBooleanResponse(success=True) # type: ignore diff --git a/py/core/main/api/v3/retrieval_router.py b/py/core/main/api/v3/retrieval_router.py new file mode 100644 index 000000000..ff89373bb --- /dev/null +++ b/py/core/main/api/v3/retrieval_router.py @@ -0,0 +1,802 @@ +import asyncio +import textwrap +from copy import copy +from typing import Any, Optional +from uuid import UUID + +from fastapi import Body, Depends +from fastapi.responses import StreamingResponse + +from core.base import ( + GenerationConfig, + GraphSearchSettings, + Message, + R2RException, + SearchSettings, +) +from core.base.api.models import ( + WrappedAgentResponse, + WrappedCompletionResponse, + WrappedRAGResponse, + WrappedSearchResponse, +) +from core.base.logger.base import RunType +from core.providers import ( + HatchetOrchestrationProvider, + SimpleOrchestrationProvider, +) + +from .base_router import BaseRouterV3 + + +class RetrievalRouterV3(BaseRouterV3): + def __init__( + self, + providers, + services, + orchestration_provider: ( + HatchetOrchestrationProvider | SimpleOrchestrationProvider + ), + run_type: RunType = RunType.RETRIEVAL, + ): + super().__init__(providers, services, orchestration_provider, run_type) + + def _register_workflows(self): + pass + + def _select_filters( + self, + auth_user: Any, + search_settings: SearchSettings, + ) -> dict[str, Any]: + + filters = copy(search_settings.filters) + selected_collections = None + if not auth_user.is_superuser: + user_collections = set(auth_user.collection_ids) + for key in filters.keys(): + if "collection_ids" in key: + selected_collections = set(filters[key]["$overlap"]) + break + + if selected_collections: + allowed_collections = user_collections.intersection( + selected_collections + ) + else: + allowed_collections = user_collections + # for non-superusers, we filter by user_id and selected & allowed collections + collection_filters = { + "$or": [ + {"user_id": {"$eq": auth_user.id}}, + { + "collection_ids": { + "$overlap": list(allowed_collections) + } + }, + ] # type: ignore + } + + filters.pop("collection_ids", None) + + filters = {"$and": [collection_filters, filters]} # type: ignore + + return filters + + def _setup_routes(self): + + @self.router.post( + "/retrieval/search", + summary="Search R2R", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response =client.retrieval.search( + query="Who is Aristotle?", + search_settings: { + filters: {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, + use_semantic_search: true, + chunk_settings: { + limit: 20, // separate limit for chunk vs. graph + enabled: true + }, + graph_settings: { + enabled: true, + }, + } + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.search({ + query: "Who is Aristotle?", + search_settings: { + filters: {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, + use_semantic_search: true, + chunk_settings: { + limit: 20, // separate limit for chunk vs. graph + enabled: true + }, + graph_settings: { + enabled: true, + } + } + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r retrieval search --query "Who is Aristotle?" + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/retrieval/search" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{ + "query": "Who is Aristotle?", + "search_settings": { + filters: {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, + use_semantic_search: true, + chunk_settings: { + limit: 20, // separate limit for chunk vs. graph + enabled: true + }, + graph_settings: { + enabled: true, + } + } + }' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def search_app( + query: str = Body( + ..., + description="Search query to find relevant documents", + ), + search_settings: SearchSettings = Body( + default_factory=SearchSettings, + description="Settings for vector-based search", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedSearchResponse: + """ + Perform a search query on the vector database and knowledge graph and any other configured search engines. + + This endpoint allows for complex filtering of search results using PostgreSQL-based queries. + Filters can be applied to various fields such as document_id, and internal metadata values. + + Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + """ + search_settings.filters = self._select_filters( + auth_user, search_settings + ) + + results = await self.services["retrieval"].search( + query=query, + search_settings=search_settings, + ) + return results + + @self.router.post( + "/retrieval/rag", + summary="RAG Query", + response_model=None, + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response =client.retrieval.rag( + query="Who is Aristotle?", + search_settings={ + "use_semantic_search": True, + "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, + "limit": 10, + chunk_settings={ + "limit": 20, # separate limit for chunk vs. graph + }, + graph_settings={ + "enabled": True, + }, + }, + rag_generation_config: { + stream: false, + temperature: 0.7, + max_tokens: 150 + } + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.retrieval.rag({ + query: "Who is Aristotle?", + search_settings: { + filters: {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, + use_semantic_search: true, + chunk_settings: { + limit: 20, // separate limit for chunk vs. graph + enabled: true + }, + graph_settings: { + enabled: true, + }, + }, + rag_generation_config: { + stream: false, + temperature: 0.7, + max_tokens: 150 + } + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r retrieval search --query "Who is Aristotle?" --stream + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/retrieval/rag" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{ + "query": "Who is Aristotle?", + "search_settings": { + "use_semantic_search": True, + "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, + "limit": 10, + chunk_settings={ + "limit": 20, # separate limit for chunk vs. graph + }, + graph_settings={ + "enabled": True, + }, + }, + "rag_generation_config": { + stream: false, + temperature: 0.7, + max_tokens: 150 + } + }' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def rag_app( + query: str = Body(...), + search_settings: SearchSettings = Body( + default_factory=SearchSettings, + description="Settings for vector-based search", + ), + rag_generation_config: GenerationConfig = Body( + default_factory=GenerationConfig, + description="Configuration for RAG generation", + ), + task_prompt_override: Optional[str] = Body( + default=None, + description="Optional custom prompt to override default", + ), + include_title_if_available: bool = Body( + default=False, + description="Include document titles in responses when available", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedRAGResponse: + """ + Execute a RAG (Retrieval-Augmented Generation) query. + + This endpoint combines search results with language model generation. + It supports the same filtering capabilities as the search endpoint, + allowing for precise control over the retrieved context. + + The generation process can be customized using the `rag_generation_config` parameter. + """ + + search_settings.filters = self._select_filters( + auth_user, search_settings + ) + + response = await self.services["retrieval"].rag( + query=query, + search_settings=search_settings, + rag_generation_config=rag_generation_config, + task_prompt_override=task_prompt_override, + include_title_if_available=include_title_if_available, + ) + + if rag_generation_config.stream: + + async def stream_generator(): + async for chunk in response: + yield chunk + await asyncio.sleep(0) + + return StreamingResponse( + stream_generator(), media_type="application/json" + ) # type: ignore + else: + return response + + @self.router.post( + "/retrieval/agent", + summary="RAG-powered Conversational Agent", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response =client.retrieval.agent( + message={ + "role": "user", + "content": "What were the key contributions of Aristotle to logic and how did they influence later philosophers?" + }, + search_settings={ + "use_semantic_search": True, + "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, + "limit": 10, + chunk_settings={ + "limit": 20, # separate limit for chunk vs. graph + }, + graph_settings={ + "enabled": True, + }, + }, + rag_generation_config: { + stream: false, + temperature: 0.7, + max_tokens: 150 + } + include_title_if_available=True, + conversation_id="550e8400-e29b-41d4-a716-446655440000" # Optional for conversation continuity + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.retrieval.agent({ + message: { + role: "user", + content: "What were the key contributions of Aristotle to logic and how did they influence later philosophers?" + }, + search_settings: { + filters: {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, + use_semantic_search: true, + chunk_settings: { + limit: 20, // separate limit for chunk vs. graph + enabled: true + }, + graph_settings: { + enabled: true, + }, + }, + rag_generation_config: { + stream: false, + temperature: 0.7, + max_tokens: 150 + }, + includeTitleIfAvailable: true, + conversationId: "550e8400-e29b-41d4-a716-446655440000" + }); + } + + main(); + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/retrieval/agent" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{ + "message": { + "role": "user", + "content": "What were the key contributions of Aristotle to logic and how did they influence later philosophers?" + }, + "search_settings": { + "use_semantic_search": True, + "filters": {"document_id": {"$eq": "3e157b3a-8469-51db-90d9-52e7d896b49b"}}, + "limit": 10, + chunk_settings={ + "limit": 20, # separate limit for chunk vs. graph + }, + graph_settings={ + "enabled": True, + }, + }, + "include_title_if_available": true, + "conversation_id": "550e8400-e29b-41d4-a716-446655440000" + }' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def agent_app( + message: Optional[Message] = Body( + None, + description="Current message to process", + ), + messages: Optional[list[Message]] = Body( + None, + deprecated=True, + description="List of messages (deprecated, use message instead)", + ), + search_settings: SearchSettings = Body( + default_factory=SearchSettings, + description="Settings for vector-based search", + ), + rag_generation_config: GenerationConfig = Body( + default_factory=GenerationConfig, + description="Configuration for RAG generation", + ), + task_prompt_override: Optional[str] = Body( + default=None, + description="Optional custom prompt to override default", + ), + include_title_if_available: bool = Body( + default=True, + description="Include document titles in responses when available", + ), + conversation_id: Optional[UUID] = Body( + default=None, + description="ID of the conversation", + ), + branch_id: Optional[UUID] = Body( + default=None, + description="ID of the conversation branch", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedAgentResponse: + """ + Engage with an intelligent RAG-powered conversational agent for complex information retrieval and analysis. + + This advanced endpoint combines retrieval-augmented generation (RAG) with a conversational AI agent to provide + detailed, context-aware responses based on your document collection. The agent can: + + - Maintain conversation context across multiple interactions + - Dynamically search and retrieve relevant information from both vector and knowledge graph sources + - Break down complex queries into sub-questions for comprehensive answers + - Cite sources and provide evidence-based responses + - Handle follow-up questions and clarifications + - Navigate complex topics with multi-step reasoning + + Key Features: + - Hybrid search combining vector and knowledge graph approaches + - Contextual conversation management with conversation_id tracking + - Customizable generation parameters for response style and length + - Source document citation with optional title inclusion + - Streaming support for real-time responses + - Branch management for exploring different conversation paths + + Common Use Cases: + - Research assistance and literature review + - Document analysis and summarization + - Technical support and troubleshooting + - Educational Q&A and tutoring + - Knowledge base exploration + + The agent uses both vector search and knowledge graph capabilities to find and synthesize + information, providing detailed, factual responses with proper attribution to source documents. + """ + + search_settings.filters = self._select_filters( + auth_user=auth_user, + search_settings=search_settings, + ) + + try: + response = await self.services["retrieval"].agent( + message=message, + messages=messages, + search_settings=search_settings, + rag_generation_config=rag_generation_config, + task_prompt_override=task_prompt_override, + include_title_if_available=include_title_if_available, + conversation_id=( + str(conversation_id) if conversation_id else None + ), + branch_id=str(branch_id) if branch_id else None, + ) + + if rag_generation_config.stream: + + async def stream_generator(): + async for chunk in response: + yield chunk + await asyncio.sleep(0) + + return StreamingResponse( + stream_generator(), media_type="application/json" + ) # type: ignore + else: + return response + except Exception as e: + raise R2RException(str(e), 500) + + @self.router.post( + "/retrieval/completion", + summary="Generate Message Completions", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + response =client.completion( + messages=[ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the capital of France?"}, + {"role": "assistant", "content": "The capital of France is Paris."}, + {"role": "user", "content": "What about Italy?"} + ], + generation_config={ + "model": "gpt-4o-mini", + "temperature": 0.7, + "max_tokens": 150, + "stream": False + } + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.completion({ + messages: [ + { role: "system", content: "You are a helpful assistant." }, + { role: "user", content: "What is the capital of France?" }, + { role: "assistant", content: "The capital of France is Paris." }, + { role: "user", content: "What about Italy?" } + ], + generationConfig: { + model: "gpt-4o-mini", + temperature: 0.7, + maxTokens: 150, + stream: false + } + }); + } + + main(); + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/retrieval/completion" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{ + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the capital of France?"}, + {"role": "assistant", "content": "The capital of France is Paris."}, + {"role": "user", "content": "What about Italy?"} + ], + "generation_config": { + "model": "gpt-4o-mini", + "temperature": 0.7, + "max_tokens": 150, + "stream": false + } + }' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def completion( + messages: list[Message] = Body( + ..., + description="List of messages to generate completion for", + example=[ + { + "role": "system", + "content": "You are a helpful assistant.", + }, + { + "role": "user", + "content": "What is the capital of France?", + }, + { + "role": "assistant", + "content": "The capital of France is Paris.", + }, + {"role": "user", "content": "What about Italy?"}, + ], + ), + generation_config: GenerationConfig = Body( + default_factory=GenerationConfig, + description="Configuration for text generation", + example={ + "model": "gpt-4o-mini", + "temperature": 0.7, + "max_tokens": 150, + "stream": False, + }, + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + response_model=WrappedCompletionResponse, + ): + """ + Generate completions for a list of messages. + + This endpoint uses the language model to generate completions for the provided messages. + The generation process can be customized using the generation_config parameter. + + The messages list should contain alternating user and assistant messages, with an optional + system message at the start. Each message should have a 'role' and 'content'. + """ + + return await self.services["retrieval"].completion( + messages=messages, + generation_config=generation_config, + ) + + @self.router.post( + "/retrieval/embedding", + summary="Generate Embeddings", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.retrieval.embedding( + text="Who is Aristotle?", + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.retrieval.embedding({ + text: "Who is Aristotle?", + }); + } + + main(); + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/retrieval/embedding" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -d '{ + "text": "Who is Aristotle?", + }' + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def embedding( + text: str = Body( + ..., + description="Text to generate embeddings for", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ): + """ + Generate embeddings for the provided text using the specified model. + + This endpoint uses the language model to generate embeddings for the provided text. + The model parameter specifies the model to use for generating embeddings. + """ + + return await self.services["retrieval"].embedding( + text=text, + ) diff --git a/py/core/main/api/v3/system_router.py b/py/core/main/api/v3/system_router.py new file mode 100644 index 000000000..cf18f7654 --- /dev/null +++ b/py/core/main/api/v3/system_router.py @@ -0,0 +1,313 @@ +import textwrap +from datetime import datetime, timezone +from typing import Optional + +import psutil +from fastapi import Depends, Query + +from core.base import R2RException, RunType +from core.base.api.models import ( + GenericMessageResponse, + WrappedGenericMessageResponse, + WrappedLogsResponse, + WrappedServerStatsResponse, + WrappedSettingsResponse, +) +from core.providers import ( + HatchetOrchestrationProvider, + SimpleOrchestrationProvider, +) + +from .base_router import BaseRouterV3 + + +class SystemRouter(BaseRouterV3): + def __init__( + self, + providers, + services, + orchestration_provider: ( + HatchetOrchestrationProvider | SimpleOrchestrationProvider + ), + run_type: RunType = RunType.MANAGEMENT, + ): + super().__init__(providers, services, orchestration_provider, run_type) + self.start_time = datetime.now(timezone.utc) + + def _setup_routes(self): + @self.router.get( + "/health", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.system.health() + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.system.health(); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r health + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/health"\\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def health_check() -> WrappedGenericMessageResponse: + return GenericMessageResponse(message="ok") # type: ignore + + @self.router.get( + "/system/settings", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.system.settings() + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.system.settings(); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r system settings + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/system/settings" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def app_settings( + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedSettingsResponse: + if not auth_user.is_superuser: + raise R2RException( + "Only a superuser can call the `system/settings` endpoint.", + 403, + ) + return await self.services["management"].app_settings() + + @self.router.get( + "/system/status", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.system.status() + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.system.status(); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r system status + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/system/status" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def server_stats( + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedServerStatsResponse: + if not auth_user.is_superuser: + raise R2RException( + "Only an authorized user can call the `system/status` endpoint.", + 403, + ) + return { # type: ignore + "start_time": self.start_time.isoformat(), + "uptime_seconds": ( + datetime.now(timezone.utc) - self.start_time + ).total_seconds(), + "cpu_usage": psutil.cpu_percent(), + "memory_usage": psutil.virtual_memory().percent, + } + + @self.router.get( + "/system/logs", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # when using auth, do client.login(...) + + result = client.system.logs() + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.system.logs({}); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r system logs + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/system/logs" \\ + -H "Content-Type: application/json" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def logs( + run_type_filter: Optional[str] = Query(""), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedLogsResponse: + if not auth_user.is_superuser: + raise R2RException( + "Only a superuser can call the `system/logs` endpoint.", + 403, + ) + + return await self.services["management"].logs( + run_type_filter=run_type_filter, + offset=offset, + limit=limit, + ) diff --git a/py/core/main/api/v3/users_router.py b/py/core/main/api/v3/users_router.py new file mode 100644 index 000000000..7da80c855 --- /dev/null +++ b/py/core/main/api/v3/users_router.py @@ -0,0 +1,1220 @@ +import textwrap +from typing import Optional +from uuid import UUID + +from fastapi import Body, Depends, Path, Query +from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm +from pydantic import EmailStr + +from core.base import R2RException +from core.base.api.models import ( + GenericBooleanResponse, + GenericMessageResponse, + WrappedBooleanResponse, + WrappedCollectionsResponse, + WrappedGenericMessageResponse, + WrappedTokenResponse, + WrappedUserResponse, + WrappedUsersResponse, +) + +from .base_router import BaseRouterV3 + +oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") + + +class UsersRouter(BaseRouterV3): + def __init__( + self, providers, services, orchestration_provider=None, run_type=None + ): + super().__init__(providers, services, orchestration_provider, run_type) + + def _setup_routes(self): + + # New authentication routes + @self.router.post( + "/users/register", + response_model=WrappedUserResponse, + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + new_user = client.users.register( + email="jane.doe@example.com", + password="secure_password123" + )""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.register({ + email: "jane.doe@example.com", + password: "secure_password123" + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r users register jane.doe@example.com secure_password123 + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/users/register" \\ + -H "Content-Type: application/json" \\ + -d '{ + "email": "jane.doe@example.com", + "password": "secure_password123" + }'""" + ), + }, + ] + }, + ) + @self.base_endpoint + async def register( + email: EmailStr = Body(..., description="User's email address"), + password: str = Body(..., description="User's password"), + ): + """Register a new user with the given email and password.""" + return await self.services["auth"].register(email, password) + + @self.router.post( + "/users/verify-email", + response_model=WrappedGenericMessageResponse, + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + tokens = client.users.verify_email( + email="jane.doe@example.com", + verification_code="1lklwal!awdclm" + )""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.verifyEmail({ + email: jane.doe@example.com", + verificationCode: "1lklwal!awdclm" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/users/login" \\ + -H "Content-Type: application/x-www-form-urlencoded" \\ + -d "email=jane.doe@example.com&verification_code=1lklwal!awdclm" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def verify_email( + email: EmailStr = Body(..., description="User's email address"), + verification_code: str = Body( + ..., description="Email verification code" + ), + ) -> WrappedGenericMessageResponse: + """Verify a user's email address.""" + result = await self.services["auth"].verify_email( + email, verification_code + ) + return GenericMessageResponse(message=result["message"]) # type: ignore + + @self.router.post( + "/users/login", + response_model=WrappedTokenResponse, + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + tokens = client.users.login( + email="jane.doe@example.com", + password="secure_password123" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.login({ + email: jane.doe@example.com", + password: "secure_password123" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/users/login" \\ + -H "Content-Type: application/x-www-form-urlencoded" \\ + -d "username=jane.doe@example.com&password=secure_password123" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def login(form_data: OAuth2PasswordRequestForm = Depends()): + """Authenticate a user and provide access tokens.""" + return await self.services["auth"].login( + form_data.username, form_data.password + ) + + @self.router.post( + "/users/logout", + response_model=WrappedGenericMessageResponse, + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + result = client.users.logout() + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.logout(); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/users/logout" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def logout( + token: str = Depends(oauth2_scheme), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedGenericMessageResponse: + """Log out the current user.""" + result = await self.services["auth"].logout(token) + return GenericMessageResponse(message=result["message"]) # type: ignore + + @self.router.post( + "/users/refresh-token", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + + new_tokens = client.users.refresh_token() + # New tokens are automatically stored in the client""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.refreshAccessToken(); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/users/refresh-token" \\ + -H "Content-Type: application/json" \\ + -d '{ + "refresh_token": "YOUR_REFRESH_TOKEN" + }'""" + ), + }, + ] + }, + ) + @self.base_endpoint + async def refresh_token( + refresh_token: str = Body(..., description="Refresh token") + ) -> WrappedTokenResponse: + """Refresh the access token using a refresh token.""" + result = await self.services["auth"].refresh_access_token( + refresh_token=refresh_token + ) + return result + + @self.router.post( + "/users/change-password", + response_model=WrappedGenericMessageResponse, + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + + result = client.users.change_password( + current_password="old_password123", + new_password="new_secure_password456" + )""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.changePassword({ + currentPassword: "old_password123", + newPassword: "new_secure_password456" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/users/change-password" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -H "Content-Type: application/json" \\ + -d '{ + "current_password": "old_password123", + "new_password": "new_secure_password456" + }'""" + ), + }, + ] + }, + ) + @self.base_endpoint + async def change_password( + current_password: str = Body(..., description="Current password"), + new_password: str = Body(..., description="New password"), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> GenericMessageResponse: + """Change the authenticated user's password.""" + result = await self.services["auth"].change_password( + auth_user, current_password, new_password + ) + return GenericMessageResponse(message=result["message"]) # type: ignore + + @self.router.post( + "/users/request-password-reset", + response_model=WrappedGenericMessageResponse, + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + result = client.users.request_password_reset( + email="jane.doe@example.com" + )""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.requestPasswordReset({ + email: jane.doe@example.com", + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/users/request-password-reset" \\ + -H "Content-Type: application/json" \\ + -d '{ + "email": "jane.doe@example.com" + }'""" + ), + }, + ] + }, + ) + @self.base_endpoint + async def request_password_reset( + email: EmailStr = Body(..., description="User's email address") + ) -> WrappedGenericMessageResponse: + """Request a password reset for a user.""" + result = await self.services["auth"].request_password_reset(email) + return GenericMessageResponse(message=result["message"]) # type: ignore + + @self.router.post( + "/users/reset-password", + response_model=WrappedGenericMessageResponse, + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + result = client.users.reset_password( + reset_token="reset_token_received_via_email", + new_password="new_secure_password789" + )""" + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.resetPassword({ + resestToken: "reset_token_received_via_email", + newPassword: "new_secure_password789" + }); + } + + main(); + """ + ), + }, + { + "lang": "cURL", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/v3/users/reset-password" \\ + -H "Content-Type: application/json" \\ + -d '{ + "reset_token": "reset_token_received_via_email", + "new_password": "new_secure_password789" + }'""" + ), + }, + ] + }, + ) + @self.base_endpoint + async def reset_password( + reset_token: str = Body(..., description="Password reset token"), + new_password: str = Body(..., description="New password"), + ) -> WrappedGenericMessageResponse: + """Reset a user's password using a reset token.""" + result = await self.services["auth"].confirm_password_reset( + reset_token, new_password + ) + return GenericMessageResponse(message=result["message"]) # type: ignore + + @self.router.get( + "/users", + summary="List Users", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + + # List users with filters + users = client.users.list( + offset=0, + limit=100, + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.list(); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r users list + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/users?offset=0&limit=100&username=john&email=john@example.com&is_active=true&is_superuser=false" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def list_users( + # TODO - Implement the following parameters + # offset: int = Query(0, ge=0, example=0), + # limit: int = Query(100, ge=1, le=1000, example=100), + # username: Optional[str] = Query(None, example="john"), + # email: Optional[str] = Query(None, example="john@example.com"), + # is_active: Optional[bool] = Query(None, example=True), + # is_superuser: Optional[bool] = Query(None, example=False), + # auth_user=Depends(self.providers.auth.auth_wrapper), + ids: list[str] = Query( + [], description="List of user IDs to filter by" + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedUsersResponse: + """ + List all users with pagination and filtering options. + Only accessible by superusers. + """ + + if not auth_user.is_superuser: + raise R2RException( + "Only a superuser can call the `users_overview` endpoint.", + 403, + ) + + user_uuids = [UUID(user_id) for user_id in ids] + + users_overview_response = await self.services[ + "management" + ].users_overview(user_ids=user_uuids, offset=offset, limit=limit) + return users_overview_response["results"], { # type: ignore + "total_entries": users_overview_response["total_entries"] + } + + @self.router.get( + "/users/me", + summary="Get the Current User", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + + # Get user details + users = client.users.me() + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.retrieve(); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r users me + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/users/me" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_current_user( + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedUserResponse: + """ + Get detailed information about the currently authenticated user. + """ + return auth_user + + @self.router.get( + "/users/{id}", + summary="Get User Details", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + + # Get user details + users = client.users.retrieve( + id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.retrieve({ + id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r users retrieve b4ac4dd6-5f27-596e-a55b-7cf242ca30aa + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/users/550e8400-e29b-41d4-a716-446655440000" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_user( + id: UUID = Path( + ..., example="550e8400-e29b-41d4-a716-446655440000" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedUserResponse: + """ + Get detailed information about a specific user. + Users can only access their own information unless they are superusers. + """ + if not auth_user.is_superuser and auth_user.id != id: + raise R2RException( + "Only a superuser can call the get `user` endpoint for other users.", + 403, + ) + + users_overview_response = await self.services[ + "management" + ].users_overview( + offset=0, + limit=1, + user_ids=[id], + ) + + return users_overview_response["results"][0] + + @self.router.delete( + "/users/{id}", + summary="Delete User", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + + # Delete user + client.users.delete(id="550e8400-e29b-41d4-a716-446655440000", password="secure_password123") + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.delete({ + id: "550e8400-e29b-41d4-a716-446655440000", + password: "secure_password123" + }); + } + + main(); + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def delete_user( + id: UUID = Path( + ..., example="550e8400-e29b-41d4-a716-446655440000" + ), + password: Optional[str] = Body( + None, description="User's current password" + ), + delete_vector_data: Optional[bool] = Body( + False, + description="Whether to delete the user's vector data", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Delete a specific user. + Users can only delete their own account unless they are superusers. + """ + if not auth_user.is_superuser and auth_user.id != id: + raise R2RException( + "Only a superuser can delete other users.", + 403, + ) + + await self.services["auth"].delete_user( + user_id=id, + password=password, + delete_vector_data=delete_vector_data, + is_superuser=auth_user.is_superuser, + ) + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.get( + "/users/{id}/collections", + summary="Get User Collections", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + + # Get user collections + collections = client.user.list_collections( + "550e8400-e29b-41d4-a716-446655440000", + offset=0, + limit=100 + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.listCollections({ + id: "550e8400-e29b-41d4-a716-446655440000", + offset: 0, + limit: 100 + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r users list-collections 550e8400-e29b-41d4-a716-446655440000 + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X GET "https://api.example.com/users/550e8400-e29b-41d4-a716-446655440000/collections?offset=0&limit=100" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def get_user_collections( + id: UUID = Path( + ..., example="550e8400-e29b-41d4-a716-446655440000" + ), + offset: int = Query( + 0, + ge=0, + description="Specifies the number of objects to skip. Defaults to 0.", + ), + limit: int = Query( + 100, + ge=1, + le=1000, + description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.", + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedCollectionsResponse: + """ + Get all collections associated with a specific user. + Users can only access their own collections unless they are superusers. + """ + if auth_user.id != id and not auth_user.is_superuser: + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + user_collection_response = await self.services[ + "management" + ].collections_overview( + offset=offset, + limit=limit, + user_ids=[id], + ) + return user_collection_response["results"], { # type: ignore + "total_entries": user_collection_response["total_entries"] + } + + @self.router.post( + "/users/{id}/collections/{collection_id}", + summary="Add User to Collection", + response_model=WrappedBooleanResponse, + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + + # Add user to collection + client.users.add_to_collection( + id="550e8400-e29b-41d4-a716-446655440000", + collection_id="750e8400-e29b-41d4-a716-446655440000" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.addToCollection({ + id: "550e8400-e29b-41d4-a716-446655440000", + collectionId: "750e8400-e29b-41d4-a716-446655440000" + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r users add-to-collection 550e8400-e29b-41d4-a716-446655440000 750e8400-e29b-41d4-a716-446655440000 + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/users/550e8400-e29b-41d4-a716-446655440000/collections/750e8400-e29b-41d4-a716-446655440000" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def add_user_to_collection( + id: UUID = Path( + ..., example="550e8400-e29b-41d4-a716-446655440000" + ), + collection_id: UUID = Path( + ..., example="750e8400-e29b-41d4-a716-446655440000" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + if auth_user.id != id and not auth_user.is_superuser: + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + # TODO - Do we need a check on user access to the collection? + await self.services["management"].add_user_to_collection( # type: ignore + id, collection_id + ) + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.delete( + "/users/{id}/collections/{collection_id}", + summary="Remove User from Collection", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + + # Remove user from collection + client.users.remove_from_collection( + id="550e8400-e29b-41d4-a716-446655440000", + collection_id="750e8400-e29b-41d4-a716-446655440000" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.removeFromCollection({ + id: "550e8400-e29b-41d4-a716-446655440000", + collectionId: "750e8400-e29b-41d4-a716-446655440000" + }); + } + + main(); + """ + ), + }, + { + "lang": "CLI", + "source": textwrap.dedent( + """ + r2r users remove-from-collection 550e8400-e29b-41d4-a716-446655440000 750e8400-e29b-41d4-a716-446655440000 + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X DELETE "https://api.example.com/users/550e8400-e29b-41d4-a716-446655440000/collections/750e8400-e29b-41d4-a716-446655440000" \\ + -H "Authorization: Bearer YOUR_API_KEY" + """ + ), + }, + ] + }, + ) + @self.base_endpoint + async def remove_user_from_collection( + id: UUID = Path( + ..., example="550e8400-e29b-41d4-a716-446655440000" + ), + collection_id: UUID = Path( + ..., example="750e8400-e29b-41d4-a716-446655440000" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedBooleanResponse: + """ + Remove a user from a collection. + Requires either superuser status or access to the collection. + """ + if auth_user.id != id and not auth_user.is_superuser: + raise R2RException( + "The currently authenticated user does not have access to the specified collection.", + 403, + ) + + # TODO - Do we need a check on user access to the collection? + await self.services["management"].remove_user_from_collection( # type: ignore + id, collection_id + ) + return GenericBooleanResponse(success=True) # type: ignore + + @self.router.post( + "/users/{id}", + summary="Update User", + openapi_extra={ + "x-codeSamples": [ + { + "lang": "Python", + "source": textwrap.dedent( + """ + from r2r import R2RClient + + client = R2RClient("http://localhost:7272") + # client.login(...) + + # Update user + updated_user = client.update_user( + "550e8400-e29b-41d4-a716-446655440000", + name="John Doe" + ) + """ + ), + }, + { + "lang": "JavaScript", + "source": textwrap.dedent( + """ + const { r2rClient } = require("r2r-js"); + + const client = new r2rClient("http://localhost:7272"); + + function main() { + const response = await client.users.update({ + id: "550e8400-e29b-41d4-a716-446655440000", + name: "John Doe" + }); + } + + main(); + """ + ), + }, + { + "lang": "Shell", + "source": textwrap.dedent( + """ + curl -X POST "https://api.example.com/users/550e8400-e29b-41d4-a716-446655440000" \\ + -H "Authorization: Bearer YOUR_API_KEY" \\ + -H "Content-Type: application/json" \\ + -d '{ + "id": "550e8400-e29b-41d4-a716-446655440000", + "name": "John Doe", + }' + """ + ), + }, + ] + }, + ) + # TODO - Modify update user to have synced params with user object + @self.base_endpoint + async def update_user( + id: UUID = Path(..., description="ID of the user to update"), + email: EmailStr | None = Body( + None, description="Updated email address" + ), + is_superuser: bool | None = Body( + None, description="Updated superuser status" + ), + name: str | None = Body(None, description="Updated user name"), + bio: str | None = Body(None, description="Updated user bio"), + profile_picture: str | None = Body( + None, description="Updated profile picture URL" + ), + auth_user=Depends(self.providers.auth.auth_wrapper), + ) -> WrappedUserResponse: + """ + Update user information. + Users can only update their own information unless they are superusers. + Superuser status can only be modified by existing superusers. + """ + + if is_superuser is not None and not auth_user.is_superuser: + raise R2RException( + "Only superusers can update the superuser status of a user", + 403, + ) + if not auth_user.is_superuser and auth_user.id != id: + raise R2RException( + "Only superusers can update other users' information", + 403, + ) + + return await self.services["auth"].update_user( + user_id=id, + email=email, + is_superuser=is_superuser, + name=name, + bio=bio, + profile_picture=profile_picture, + ) diff --git a/py/core/main/app.py b/py/core/main/app.py index 5fc6ec16c..c86de9ce3 100644 --- a/py/core/main/app.py +++ b/py/core/main/app.py @@ -1,5 +1,3 @@ -from typing import Union - from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.openapi.utils import get_openapi @@ -11,11 +9,17 @@ SimpleOrchestrationProvider, ) -from .api.auth_router import AuthRouter -from .api.ingestion_router import IngestionRouter -from .api.kg_router import KGRouter -from .api.management_router import ManagementRouter -from .api.retrieval_router import RetrievalRouter +from .api.v3.auth_router import AuthRouter +from .api.v3.chunks_router import ChunksRouter +from .api.v3.collections_router import CollectionsRouter +from .api.v3.conversations_router import ConversationsRouter +from .api.v3.documents_router import DocumentsRouter +from .api.v3.graph_router import GraphRouter +from .api.v3.indices_router import IndicesRouter +from .api.v3.prompts_router import PromptsRouter +from .api.v3.retrieval_router import RetrievalRouterV3 +from .api.v3.system_router import SystemRouter +from .api.v3.users_router import UsersRouter from .config import R2RConfig @@ -23,22 +27,35 @@ class R2RApp: def __init__( self, config: R2RConfig, - orchestration_provider: Union[ - HatchetOrchestrationProvider, SimpleOrchestrationProvider - ], + orchestration_provider: ( + HatchetOrchestrationProvider | SimpleOrchestrationProvider + ), auth_router: AuthRouter, - ingestion_router: IngestionRouter, - management_router: ManagementRouter, - retrieval_router: RetrievalRouter, - kg_router: KGRouter, + documents_router: DocumentsRouter, + chunks_router: ChunksRouter, + indices_router: IndicesRouter, + users_router: UsersRouter, + collections_router: CollectionsRouter, + conversations_router: ConversationsRouter, + prompts_router: PromptsRouter, + retrieval_router_v3: RetrievalRouterV3, + system_router: SystemRouter, + graph_router: GraphRouter, ): self.config = config - self.ingestion_router = ingestion_router - self.management_router = management_router - self.retrieval_router = retrieval_router self.auth_router = auth_router - self.kg_router = kg_router self.orchestration_provider = orchestration_provider + self.documents_router = documents_router + self.chunks_router = chunks_router + self.indices_router = indices_router + self.users_router = users_router + self.collections_router = collections_router + self.conversations_router = conversations_router + self.prompts_router = prompts_router + self.retrieval_router_v3 = retrieval_router_v3 + self.system_router = system_router + self.graph_router = graph_router + self.app = FastAPI() @self.app.exception_handler(R2RException) @@ -55,14 +72,19 @@ async def r2r_exception_handler(request: Request, exc: R2RException): self._apply_cors() def _setup_routes(self): - # Include routers in the app - self.app.include_router(self.ingestion_router, prefix="/v2") - self.app.include_router(self.management_router, prefix="/v2") - self.app.include_router(self.retrieval_router, prefix="/v2") - self.app.include_router(self.auth_router, prefix="/v2") - self.app.include_router(self.kg_router, prefix="/v2") - @self.app.get("/v2/openapi_spec") + self.app.include_router(self.documents_router, prefix="/v3") + self.app.include_router(self.chunks_router, prefix="/v3") + self.app.include_router(self.indices_router, prefix="/v3") + self.app.include_router(self.users_router, prefix="/v3") + self.app.include_router(self.collections_router, prefix="/v3") + self.app.include_router(self.conversations_router, prefix="/v3") + self.app.include_router(self.prompts_router, prefix="/v3") + self.app.include_router(self.retrieval_router_v3, prefix="/v3") + self.app.include_router(self.graph_router, prefix="/v3") + self.app.include_router(self.system_router, prefix="/v3") + + @self.app.get("/openapi_spec", include_in_schema=False) async def openapi_spec(): return get_openapi( title="R2R Application API", diff --git a/py/core/main/app_entry.py b/py/core/main/app_entry.py index 5328e637a..313f35cd2 100644 --- a/py/core/main/app_entry.py +++ b/py/core/main/app_entry.py @@ -1,6 +1,5 @@ import logging import os -import warnings from contextlib import asynccontextmanager from typing import Optional @@ -50,7 +49,7 @@ async def create_r2r_app( config_name: Optional[str] = "default", config_path: Optional[str] = None, ): - config = R2RConfig.load(config_name, config_path) + config = R2RConfig.load(config_name=config_name, config_path=config_path) if ( config.embedding.provider == "openai" @@ -67,35 +66,13 @@ async def create_r2r_app( logging.basicConfig(level=logging.INFO) -config_name = os.getenv("R2R_CONFIG_NAME", os.getenv("CONFIG_NAME", None)) -config_path = os.getenv("R2R_CONFIG_PATH", os.getenv("CONFIG_PATH", None)) - -# TODO: Remove this check in a future release -# Check if the user is setting deprecated environment variables of CONFIG_NAME and CONFIG_PATH -if os.getenv("CONFIG_NAME"): - warnings.warn( - "Environment variable CONFIG_NAME is deprecated and support for it will be removed in release 3.5.0. Please use R2R_CONFIG_NAME instead." - ) -if os.getenv("CONFIG_PATH"): - warnings.warn( - "Environment variable CONFIG_PATH is deprecated and support for it will be removed in release 3.5.0. Please use R2R_CONFIG_PATH instead." - ) +config_name = os.getenv("R2R_CONFIG_NAME", None) +config_path = os.getenv("R2R_CONFIG_PATH", None) if not config_path and not config_name: config_name = "default" host = os.getenv("R2R_HOST", os.getenv("HOST", "0.0.0.0")) -port = int(os.getenv("R2R_PORT", (os.getenv("PORT", "7272")))) - -# TODO: Remove this check in a future release -# Check if the user is setting deprecated environment variables of HOST and PORT -if os.getenv("HOST"): - warnings.warn( - "Environment variable HOST is deprecated and support for it will be removed in release 3.5.0. Please use R2R_HOST instead." - ) -if os.getenv("PORT"): - warnings.warn( - "Environment variable PORT is deprecated and support for it will be removed in release 3.5.0. Please use R2R_PORT instead." - ) +port = int(os.getenv("R2R_PORT", "7272")) logger.info( f"Environment R2R_CONFIG_NAME: {'None' if config_name is None else config_name}" diff --git a/py/core/main/assembly/builder.py b/py/core/main/assembly/builder.py index 9f15578df..6c2b71936 100644 --- a/py/core/main/assembly/builder.py +++ b/py/core/main/assembly/builder.py @@ -1,6 +1,6 @@ import logging from dataclasses import dataclass -from typing import Any, Dict, Optional, Type +from typing import Any, Optional, Type from core.agent import R2RRAGAgent from core.base import ( @@ -17,11 +17,17 @@ from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider from ..abstractions import R2RProviders -from ..api.auth_router import AuthRouter -from ..api.ingestion_router import IngestionRouter -from ..api.kg_router import KGRouter -from ..api.management_router import ManagementRouter -from ..api.retrieval_router import RetrievalRouter +from ..api.v3.auth_router import AuthRouter +from ..api.v3.chunks_router import ChunksRouter +from ..api.v3.collections_router import CollectionsRouter +from ..api.v3.conversations_router import ConversationsRouter +from ..api.v3.documents_router import DocumentsRouter +from ..api.v3.graph_router import GraphRouter +from ..api.v3.indices_router import IndicesRouter +from ..api.v3.prompts_router import PromptsRouter +from ..api.v3.retrieval_router import RetrievalRouterV3 +from ..api.v3.system_router import SystemRouter +from ..api.v3.users_router import UsersRouter from ..app import R2RApp from ..config import R2RConfig from ..services.auth_service import AuthService @@ -176,8 +182,8 @@ def _create_pipelines( ).create_pipelines(*args, **kwargs) def _create_services( - self, service_params: Dict[str, Any] - ) -> Dict[str, Any]: + self, service_params: dict[str, Any] + ) -> dict[str, Any]: services = {} for service_type, override in vars(self.service_overrides).items(): logger.info(f"Creating {service_type} service") @@ -231,22 +237,58 @@ async def build(self, *args, **kwargs) -> R2RApp: routers = { "auth_router": AuthRouter( - services["auth"], orchestration_provider=orchestration_provider + providers=providers, + services=services, + orchestration_provider=orchestration_provider, + ).get_router(), + "documents_router": DocumentsRouter( + providers=providers, + services=services, + orchestration_provider=orchestration_provider, + ).get_router(), + "chunks_router": ChunksRouter( + providers=providers, + services=services, + orchestration_provider=orchestration_provider, + ).get_router(), + "indices_router": IndicesRouter( + providers=providers, + services=services, + orchestration_provider=orchestration_provider, + ).get_router(), + "users_router": UsersRouter( + providers=providers, + services=services, + orchestration_provider=orchestration_provider, + ).get_router(), + "collections_router": CollectionsRouter( + providers=providers, + services=services, + orchestration_provider=orchestration_provider, + ).get_router(), + "conversations_router": ConversationsRouter( + providers=providers, + services=services, + orchestration_provider=orchestration_provider, ).get_router(), - "ingestion_router": IngestionRouter( - services["ingestion"], + "prompts_router": PromptsRouter( + providers=providers, + services=services, orchestration_provider=orchestration_provider, ).get_router(), - "management_router": ManagementRouter( - services["management"], + "retrieval_router_v3": RetrievalRouterV3( + providers=providers, + services=services, orchestration_provider=orchestration_provider, ).get_router(), - "retrieval_router": RetrievalRouter( - services["retrieval"], + "system_router": SystemRouter( + providers=providers, + services=services, orchestration_provider=orchestration_provider, ).get_router(), - "kg_router": KGRouter( - services["kg"], + "graph_router": GraphRouter( + providers=providers, + services=services, orchestration_provider=orchestration_provider, ).get_router(), } diff --git a/py/core/main/assembly/factory.py b/py/core/main/assembly/factory.py index df9051dea..3d249cb9c 100644 --- a/py/core/main/assembly/factory.py +++ b/py/core/main/assembly/factory.py @@ -18,9 +18,8 @@ ) from core.pipelines import RAGPipeline, SearchPipeline from core.pipes import GeneratorPipe, MultiSearchPipe, SearchPipe -from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider - from core.providers.email.sendgrid import SendGridEmailProvider +from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders from ..config import R2RConfig @@ -376,7 +375,7 @@ def create_pipes( self, parsing_pipe_override: Optional[AsyncPipe] = None, embedding_pipe_override: Optional[AsyncPipe] = None, - kg_triples_extraction_pipe_override: Optional[AsyncPipe] = None, + kg_relationships_extraction_pipe_override: Optional[AsyncPipe] = None, kg_storage_pipe_override: Optional[AsyncPipe] = None, kg_search_pipe_override: Optional[AsyncPipe] = None, vector_storage_pipe_override: Optional[AsyncPipe] = None, @@ -401,8 +400,8 @@ def create_pipes( ), embedding_pipe=embedding_pipe_override or self.create_embedding_pipe(*args, **kwargs), - kg_triples_extraction_pipe=kg_triples_extraction_pipe_override - or self.create_kg_triples_extraction_pipe(*args, **kwargs), + kg_relationships_extraction_pipe=kg_relationships_extraction_pipe_override + or self.create_kg_relationships_extraction_pipe(*args, **kwargs), kg_storage_pipe=kg_storage_pipe_override or self.create_kg_storage_pipe(*args, **kwargs), vector_storage_pipe=vector_storage_pipe_override @@ -547,14 +546,16 @@ def create_vector_search_pipe(self, *args, **kwargs) -> Any: config=AsyncPipe.PipeConfig(name="routing_search_pipe"), ) - def create_kg_triples_extraction_pipe(self, *args, **kwargs) -> Any: - from core.pipes import KGTriplesExtractionPipe + def create_kg_relationships_extraction_pipe(self, *args, **kwargs) -> Any: + from core.pipes import KGExtractionPipe - return KGTriplesExtractionPipe( + return KGExtractionPipe( logging_provider=self.providers.logging, llm_provider=self.providers.llm, database_provider=self.providers.database, - config=AsyncPipe.PipeConfig(name="kg_triples_extraction_pipe"), + config=AsyncPipe.PipeConfig( + name="kg_relationships_extraction_pipe" + ), ) def create_kg_storage_pipe(self, *args, **kwargs) -> Any: diff --git a/py/core/main/config.py b/py/core/main/config.py index 4b914d6da..75e1477f4 100644 --- a/py/core/main/config.py +++ b/py/core/main/config.py @@ -194,22 +194,14 @@ def load( config_path: Optional[str] = None, ) -> "R2RConfig": if config_path and config_name: - raise ValueError("Cannot specify both config_path and config_name") - - # TODO: Remove CONFIG_PATH and CONFIG_NAME in a future release - if ( - config_path := os.getenv("R2R_CONFIG_PATH") - or os.getenv("CONFIG_PATH") - or config_path - ): + raise ValueError( + f"Cannot specify both config_path and config_name. Got: {config_path}, {config_name}" + ) + + if config_path := os.getenv("R2R_CONFIG_PATH") or config_path: return cls.from_toml(config_path) - config_name = ( - os.getenv("R2R_CONFIG_NAME") - or os.getenv("CONFIG_NAME") - or config_name - or "default" - ) + config_name = os.getenv("R2R_CONFIG_NAME") or config_name or "default" if config_name not in R2RConfig.CONFIG_OPTIONS: raise ValueError(f"Invalid config name: {config_name}") return cls.from_toml(R2RConfig.CONFIG_OPTIONS[config_name]) diff --git a/py/core/main/orchestration/hatchet/ingestion_workflow.py b/py/core/main/orchestration/hatchet/ingestion_workflow.py index e30220db2..22f4c8485 100644 --- a/py/core/main/orchestration/hatchet/ingestion_workflow.py +++ b/py/core/main/orchestration/hatchet/ingestion_workflow.py @@ -9,13 +9,14 @@ from litellm import AuthenticationError from core.base import ( - DocumentExtraction, + DocumentChunk, IngestionStatus, + KGEnrichmentStatus, OrchestrationProvider, generate_extraction_id, increment_version, ) -from core.base.abstractions import DocumentInfo, R2RException +from core.base.abstractions import DocumentResponse, R2RException from core.utils import ( generate_default_user_collection_id, update_settings_from_dict, @@ -88,21 +89,6 @@ async def parse(self, context: Context) -> dict: async for extraction in extractions_generator: extractions.append(extraction) - # serializable_extractions = [ - # extraction.to_dict() for extraction in extractions - # ] - - # return { - # "status": "Successfully extracted data", - # "extractions": serializable_extractions, - # "document_info": document_info.to_dict(), - # } - - # @orchestration_provider.step(parents=["parse"], timeout="60m") - # async def embed(self, context: Context) -> dict: - # document_info_dict = context.step_output("parse")["document_info"] - # document_info = DocumentInfo(**document_info_dict) - await service.update_document_status( document_info, status=IngestionStatus.AUGMENTING ) @@ -140,16 +126,6 @@ async def parse(self, context: Context) -> dict: async for _ in storage_generator: pass - # return { - # "document_info": document_info.to_dict(), - # } - - # @orchestration_provider.step(parents=["embed"], timeout="60m") - # async def finalize(self, context: Context) -> dict: - # document_info_dict = context.step_output("embed")["document_info"] - # print("Calling finalize for document_info_dict = ", document_info_dict) - # document_info = DocumentInfo(**document_info_dict) - is_update = context.workflow_input()["request"].get( "is_update" ) @@ -169,7 +145,7 @@ async def parse(self, context: Context) -> dict: if not collection_ids: # TODO: Move logic onto the `management service` collection_id = generate_default_user_collection_id( - document_info.user_id + document_info.owner_id ) await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, @@ -179,14 +155,34 @@ async def parse(self, context: Context) -> dict: document_id=document_info.id, collection_id=collection_id, ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.OUTDATED, + ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_cluster_status", # NOTE - we should actually check that cluster has been made first, if not it should be PENDING still + status=KGEnrichmentStatus.OUTDATED, + ) else: for collection_id in collection_ids: try: - await service.providers.database.create_collection( - name=document_info.title, + name = document_info.title or "N/A" + description = "" + result = await self.providers.database.create_collection( + owner_id=document_info.owner_id, + name=name, + description=description, collection_id=collection_id, - description="", ) + await self.providers.database.graph_handler.create( + collection_id=collection_id, + name=name, + description=description, + graph_id=collection_id, + ) + except Exception as e: logger.warning( f"Warning, could not create collection with error: {str(e)}" @@ -200,7 +196,16 @@ async def parse(self, context: Context) -> dict: document_id=document_info.id, collection_id=collection_id, ) - + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.OUTDATED, + ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_cluster_status", # NOTE - we should actually check that cluster has been made first, if not it should be PENDING still + status=KGEnrichmentStatus.OUTDATED, + ) # get server chunk enrichment settings and override parts of it if provided in the ingestion config server_chunk_enrichment_settings = getattr( service.providers.ingestion.config, @@ -223,7 +228,9 @@ async def parse(self, context: Context) -> dict: # we don't update the document_info when we assign document_to_collection_relational and document_to_collection_vector # hack: get document_info again from DB document_info = ( - await self.ingestion_service.providers.database.get_documents_overview( + await self.ingestion_service.providers.database.get_documents_overview( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + offset=0, + limit=100, filter_user_ids=[document_info.user_id], filter_document_ids=[document_info.id], ) @@ -273,8 +280,10 @@ async def on_failure(self, context: Context) -> None: try: documents_overview = ( - await self.ingestion_service.providers.database.get_documents_overview( - filter_document_ids=[document_id] + await self.ingestion_service.providers.database.get_documents_overview( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + offset=0, + limit=100, + filter_document_ids=[document_id], ) )["results"] @@ -332,7 +341,9 @@ async def update_files(self, context: Context) -> None: ) documents_overview = ( - await self.ingestion_service.providers.database.get_documents_overview( + await self.ingestion_service.providers.database.get_documents_overview( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + offset=0, + limit=100, filter_document_ids=document_ids, filter_user_ids=None if user.is_superuser else [user.id], ) @@ -424,11 +435,11 @@ async def ingest(self, context: Context) -> dict: document_id = document_info.id extractions = [ - DocumentExtraction( + DocumentChunk( id=generate_extraction_id(document_id, i), document_id=document_id, collection_ids=[], - user_id=document_info.user_id, + owner_id=document_info.owner_id, data=chunk.text, metadata=parsed_data["metadata"], ).to_dict() @@ -443,7 +454,7 @@ async def ingest(self, context: Context) -> dict: @orchestration_provider.step(parents=["ingest"], timeout="60m") async def embed(self, context: Context) -> dict: document_info_dict = context.step_output("ingest")["document_info"] - document_info = DocumentInfo(**document_info_dict) + document_info = DocumentResponse(**document_info_dict) extractions = context.step_output("ingest")["extractions"] @@ -473,7 +484,7 @@ async def embed(self, context: Context) -> dict: @orchestration_provider.step(parents=["embed"], timeout="60m") async def finalize(self, context: Context) -> dict: document_info_dict = context.step_output("embed")["document_info"] - document_info = DocumentInfo(**document_info_dict) + document_info = DocumentResponse(**document_info_dict) await self.ingestion_service.finalize_ingestion( document_info, is_update=False @@ -491,7 +502,7 @@ async def finalize(self, context: Context) -> dict: if not collection_ids: # TODO: Move logic onto the `management service` collection_id = generate_default_user_collection_id( - document_info.user_id + document_info.owner_id ) await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, @@ -501,14 +512,34 @@ async def finalize(self, context: Context) -> dict: document_id=document_info.id, collection_id=collection_id, ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.OUTDATED, + ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_cluster_status", # NOTE - we should actually check that cluster has been made first, if not it should be PENDING still + status=KGEnrichmentStatus.OUTDATED, + ) else: for collection_id in collection_ids: try: + name = document_info.title or "N/A" + description = "" await service.providers.database.create_collection( - name=document_info.title or "N/A", + owner_id=document_info.owner_id, + name=name, + description=description, collection_id=collection_id, - description="", ) + await self.providers.database.graph_handler.create( + collection_id=collection_id, + name=name, + description=description, + graph_id=collection_id, + ) + except Exception as e: logger.warning( f"Warning, could not create collection with error: {str(e)}" @@ -522,6 +553,16 @@ async def finalize(self, context: Context) -> dict: document_id=document_info.id, collection_id=collection_id, ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.OUTDATED, + ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_cluster_status", + status=KGEnrichmentStatus.OUTDATED, # NOTE - we should actually check that cluster has been made first, if not it should be PENDING still + ) except Exception as e: logger.error( f"Error during assigning document to collection: {str(e)}" @@ -545,8 +586,10 @@ async def on_failure(self, context: Context) -> None: try: documents_overview = ( - await self.ingestion_service.providers.database.get_documents_overview( - filter_document_ids=[document_id] + await self.ingestion_service.providers.database.get_documents_overview( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + offset=0, + limit=100, + filter_document_ids=[document_id], ) )["results"] @@ -593,14 +636,14 @@ async def update_chunk(self, context: Context) -> dict: else parsed_data["document_id"] ) extraction_uuid = ( - UUID(parsed_data["extraction_id"]) - if isinstance(parsed_data["extraction_id"], str) - else parsed_data["extraction_id"] + UUID(parsed_data["id"]) + if isinstance(parsed_data["id"], str) + else parsed_data["id"] ) await self.ingestion_service.update_chunk_ingress( document_id=document_uuid, - extraction_id=extraction_uuid, + chunk_id=extraction_uuid, text=parsed_data.get("text"), user=parsed_data["user"], metadata=parsed_data.get("metadata"), diff --git a/py/core/main/orchestration/hatchet/kg_workflow.py b/py/core/main/orchestration/hatchet/kg_workflow.py index 11d9c05a3..4687e1d45 100644 --- a/py/core/main/orchestration/hatchet/kg_workflow.py +++ b/py/core/main/orchestration/hatchet/kg_workflow.py @@ -8,7 +8,7 @@ from hatchet_sdk import ConcurrencyLimitStrategy, Context from core import GenerationConfig -from core.base import OrchestrationProvider +from core.base import OrchestrationProvider, R2RException from core.base.abstractions import KGEnrichmentStatus, KGExtractionStatus from ...services import KgService @@ -27,18 +27,27 @@ def hatchet_kg_factory( def get_input_data_dict(input_data): for key, value in input_data.items(): + if value is None: + continue + + if key == "document_id": + input_data[key] = uuid.UUID(value) + if key == "collection_id": input_data[key] = uuid.UUID(value) - if key == "kg_creation_settings": + if key == "graph_id": + input_data[key] = uuid.UUID(value) + + if key == "graph_creation_settings": input_data[key] = json.loads(value) input_data[key]["generation_config"] = GenerationConfig( **input_data[key]["generation_config"] ) - if key == "kg_enrichment_settings": + if key == "graph_enrichment_settings": input_data[key] = json.loads(value) - if key == "kg_entity_deduplication_settings": + if key == "graph_entity_deduplication_settings": input_data[key] = json.loads(value) if isinstance(input_data[key]["generation_config"], str): @@ -70,9 +79,7 @@ def __init__(self, kg_service: KgService): def concurrency(self, context: Context) -> str: # TODO: Possible bug in hatchet, the job can't find context.workflow_input() when rerun try: - return str( - context.workflow_input()["request"]["collection_id"] - ) + return str(context.workflow_input()["request"]["graph_id"]) except Exception as e: return str(uuid.uuid4()) @@ -86,20 +93,31 @@ async def kg_extract(self, context: Context) -> dict: context.workflow_input()["request"] ) - # context.log(f"Running KG Extraction for collection ID: {input_data['collection_id']}") + # context.log(f"Running KG Extraction for collection ID: {input_data['graph_id']}") document_id = input_data["document_id"] - await self.kg_service.kg_triples_extraction( - document_id=uuid.UUID(document_id), - **input_data["kg_creation_settings"], - ) + # await self.kg_service.kg_relationships_extraction( + # document_id=document_id, + # **input_data["graph_creation_settings"], + # ) + extractions = [] + async for extraction in service.kg_extraction( + document_id=document_id, + **input_data["graph_creation_settings"], + ): + print( + "found extraction w/ entities = = ", + len(extraction.entities), + ) + extractions.append(extraction) + await service.store_kg_extractions(extractions) logger.info( - f"Successfully ran kg triples extraction for document {document_id}" + f"Successfully ran kg relationships extraction for document {document_id}" ) return { - "result": f"successfully ran kg triples extraction for document {document_id} in {time.time() - start_time:.2f} seconds", + "result": f"successfully ran kg relationships extraction for document {document_id} in {time.time() - start_time:.2f} seconds", } @orchestration_provider.step( @@ -113,8 +131,8 @@ async def kg_entity_description(self, context: Context) -> dict: document_id = input_data["document_id"] await self.kg_service.kg_entity_description( - document_id=uuid.UUID(document_id), - **input_data["kg_creation_settings"], + document_id=document_id, + **input_data["graph_creation_settings"], ) logger.info( @@ -139,7 +157,7 @@ async def on_failure(self, context: Context) -> None: try: await self.kg_service.providers.database.set_workflow_status( id=uuid.UUID(document_id), - status_type="kg_extraction_status", + status_type="extraction_status", status=KGExtractionStatus.FAILED, ) context.log( @@ -151,118 +169,199 @@ async def on_failure(self, context: Context) -> None: f"Failed to update document status for {document_id}: {e}" ) - @orchestration_provider.workflow(name="create-graph", timeout="600m") + @orchestration_provider.workflow(name="extract-triples", timeout="600m") class CreateGraphWorkflow: def __init__(self, kg_service: KgService): self.kg_service = kg_service @orchestration_provider.step(retries=1) - async def get_document_ids_for_create_graph( - self, context: Context - ) -> dict: - + async def kg_extraction(self, context: Context) -> dict: input_data = get_input_data_dict( context.workflow_input()["request"] ) - collection_id = input_data["collection_id"] - - return_val = { - "document_ids": [ - str(doc_id) - for doc_id in await self.kg_service.get_document_ids_for_create_graph( - collection_id=collection_id, - **input_data["kg_creation_settings"], - ) - ] - } + document_id = input_data["document_id"] - if len(return_val["document_ids"]) == 0: - raise ValueError( - "No documents to process, either all documents to create the graph were already created or in progress, or the collection is empty." + # Extract relationships and store them + extractions = [] + async for extraction in self.kg_service.kg_extraction( + document_id=document_id, + **input_data["graph_creation_settings"], + ): + logger.info( + f"Found extraction with {len(extraction.entities)} entities" ) + extractions.append(extraction) - return return_val + await self.kg_service.store_kg_extractions(extractions) - @orchestration_provider.step( - retries=1, parents=["get_document_ids_for_create_graph"] - ) - async def kg_extraction_ingress(self, context: Context) -> dict: - - document_ids = [ - uuid.UUID(doc_id) - for doc_id in context.step_output( - "get_document_ids_for_create_graph" - )["document_ids"] - ] - results = [] - for cnt, document_id in enumerate(document_ids): - logger.info( - f"Running Graph Creation Workflow for document ID: {document_id}" - ) - results.append( - ( - await context.aio.spawn_workflow( - "kg-extract", - { - "request": { - "document_id": str(document_id), - "kg_creation_settings": context.workflow_input()[ - "request" - ][ - "kg_creation_settings" - ], - "collection_id": context.workflow_input()[ - "request" - ]["collection_id"], - } - }, - key=f"kg-extract-{cnt}/{len(document_ids)}", - ) - ).result() - ) + logger.info( + f"Successfully ran kg relationships extraction for document {document_id}" + ) - if not document_ids: - logger.info( - "No documents to process, either all graphs were created or in progress, or no documents were provided. Skipping graph creation." - ) - return {"result": "No documents to process"} + return { + "result": f"successfully ran kg relationships extraction for document {document_id}", + "document_id": str(document_id), + } + + @orchestration_provider.step(retries=1, parents=["kg_extraction"]) + async def kg_entity_description(self, context: Context) -> dict: + input_data = get_input_data_dict( + context.workflow_input()["request"] + ) + document_id = input_data["document_id"] + + # Describe the entities in the graph + await self.kg_service.kg_entity_description( + document_id=document_id, + **input_data["graph_creation_settings"], + ) + + logger.info( + f"Successfully ran kg entity description for document {document_id}" + ) - logger.info(f"Ran {len(results)} workflows for graph creation") - results = await asyncio.gather(*results) return { - "result": f"successfully ran graph creation workflows for {len(results)} documents" + "result": f"successfully ran kg entity description for document {document_id}" } - @orchestration_provider.step( - retries=1, parents=["kg_extraction_ingress"] - ) - async def update_enrichment_status(self, context: Context) -> dict: + @orchestration_provider.failure() + async def on_failure(self, context: Context) -> None: + request = context.workflow_input().get("request", {}) + document_id = request.get("document_id") - enrichment_status = ( - await self.kg_service.providers.database.get_workflow_status( - id=uuid.UUID( - context.workflow_input()["request"]["collection_id"] - ), - status_type="kg_enrichment_status", + if not document_id: + logger.info( + "No document id was found in workflow input to mark a failure." ) - ) + return - if enrichment_status == KGEnrichmentStatus.SUCCESS: + try: await self.kg_service.providers.database.set_workflow_status( - id=uuid.UUID( - context.workflow_input()["request"]["collection_id"] - ), - status_type="kg_enrichment_status", - status=KGEnrichmentStatus.OUTDATED, + id=uuid.UUID(document_id), + status_type="extraction_status", + status=KGExtractionStatus.FAILED, ) - logger.info( - f"Updated enrichment status for collection {context.workflow_input()['request']['collection_id']} to OUTDATED because an older enrichment was already successful" + f"Updated KG extraction status for {document_id} to FAILED" + ) + except Exception as e: + logger.error( + f"Failed to update document status for {document_id}: {e}" ) - return { - "result": f"updated enrichment status for collection {context.workflow_input()['request']['collection_id']} to OUTDATED because an older enrichment was already successful" - } + # class CreateGraphWorkflow: + # def __init__(self, kg_service: KgService): + # self.kg_service = kg_service + + # @orchestration_provider.step(retries=1) + # async def get_document_ids_for_create_graph( + # self, context: Context + # ) -> dict: + + # input_data = get_input_data_dict( + # context.workflow_input()["request"] + # ) + + # if "collection_id" in input_data: + + # collection_id = input_data["collection_id"] + + # return_val = { + # "document_ids": [ + # str(doc_id) + # for doc_id in await self.kg_service.get_document_ids_for_create_graph( + # collection_id=collection_id, + # **input_data["graph_creation_settings"], + # ) + # ] + # } + + # if len(return_val["document_ids"]) == 0: + # raise ValueError( + # "No documents to process, either all documents to create the graph were already created or in progress, or the collection is empty." + # ) + # else: + # return_val = {"document_ids": [str(input_data["document_id"])]} + + # return return_val + + # @orchestration_provider.step( + # retries=1, parents=["get_document_ids_for_create_graph"] + # ) + # async def kg_extraction_ingress(self, context: Context) -> dict: + + # document_ids = [ + # uuid.UUID(doc_id) + # for doc_id in context.step_output( + # "get_document_ids_for_create_graph" + # )["document_ids"] + # ] + # results = [] + # for cnt, document_id in enumerate(document_ids): + # logger.info( + # f"Running Graph Creation Workflow for document ID: {document_id}" + # ) + # results.append( + # ( + # await context.aio.spawn_workflow( + # "kg-extract", + # { + # "request": { + # "document_id": str(document_id), + # "graph_creation_settings": context.workflow_input()[ + # "request" + # ][ + # "graph_creation_settings" + # ], + # } + # }, + # key=f"kg-extract-{cnt}/{len(document_ids)}", + # ) + # ).result() + # ) + + # if not document_ids: + # logger.info( + # "No documents to process, either all graphs were created or in progress, or no documents were provided. Skipping graph creation." + # ) + # return {"result": "No documents to process"} + + # logger.info(f"Ran {len(results)} workflows for graph creation") + # results = await asyncio.gather(*results) + # return { + # "result": f"successfully ran graph creation workflows for {len(results)} documents" + # } + + # @orchestration_provider.step( + # retries=1, parents=["kg_extraction_ingress"] + # ) + # async def update_enrichment_status(self, context: Context) -> dict: + + # enrichment_status = ( + # await self.kg_service.providers.database.get_workflow_status( + # id=uuid.UUID( + # context.workflow_input()["request"]["collection_id"] + # ), + # status_type="graph_cluster_status", + # ) + # ) + + # if enrichment_status == KGEnrichmentStatus.SUCCESS: + # await self.kg_service.providers.database.set_workflow_status( + # id=uuid.UUID( + # context.workflow_input()["request"]["collection_id"] + # ), + # status_type="graph_cluster_status", + # status=KGEnrichmentStatus.OUTDATED, + # ) + + # logger.info( + # f"Updated enrichment status for collection {context.workflow_input()['request']['collection_id']} to OUTDATED because an older enrichment was already successful" + # ) + + # return { + # "result": f"updated enrichment status for collection {context.workflow_input()['request']['collection_id']} to OUTDATED because an older enrichment was already successful" + # } @orchestration_provider.workflow( name="entity-deduplication", timeout="360m" @@ -280,26 +379,26 @@ async def kg_entity_deduplication_setup( context.workflow_input()["request"] ) - collection_id = input_data["collection_id"] + graph_id = input_data["graph_id"] logger.info( - f"Running KG Entity Deduplication for collection {collection_id}" + f"Running KG Entity Deduplication for collection {graph_id}" ) logger.info(f"Input data: {input_data}") logger.info( - f"KG Entity Deduplication Settings: {input_data['kg_entity_deduplication_settings']}" + f"KG Entity Deduplication Settings: {input_data['graph_entity_deduplication_settings']}" ) number_of_distinct_entities = ( await self.kg_service.kg_entity_deduplication( - collection_id=collection_id, - **input_data["kg_entity_deduplication_settings"], + graph_id=graph_id, + **input_data["graph_entity_deduplication_settings"], ) )[0]["num_entities"] - input_data["kg_entity_deduplication_settings"][ + input_data["graph_entity_deduplication_settings"][ "generation_config" - ] = input_data["kg_entity_deduplication_settings"][ + ] = input_data["graph_entity_deduplication_settings"][ "generation_config" ].model_dump_json() @@ -313,12 +412,12 @@ async def kg_entity_deduplication_setup( "kg-entity-deduplication-summary", { "request": { - "collection_id": collection_id, + "graph_id": graph_id, "offset": offset, "limit": 100, - "kg_entity_deduplication_settings": json.dumps( + "graph_entity_deduplication_settings": json.dumps( input_data[ - "kg_entity_deduplication_settings" + "graph_entity_deduplication_settings" ] ), } @@ -329,7 +428,7 @@ async def kg_entity_deduplication_setup( await asyncio.gather(*workflows) return { - "result": f"successfully queued kg entity deduplication for collection {collection_id} with {number_of_distinct_entities} distinct entities" + "result": f"successfully queued kg entity deduplication for collection {graph_id} with {number_of_distinct_entities} distinct entities" } @orchestration_provider.workflow( @@ -351,73 +450,107 @@ async def kg_entity_deduplication_summary( input_data = get_input_data_dict( context.workflow_input()["request"] ) - collection_id = input_data["collection_id"] + graph_id = input_data["graph_id"] await self.kg_service.kg_entity_deduplication_summary( - collection_id=collection_id, + graph_id=graph_id, offset=input_data["offset"], limit=input_data["limit"], - **input_data["kg_entity_deduplication_settings"], + **input_data["graph_entity_deduplication_settings"], ) return { - "result": f"successfully queued kg entity deduplication summary for collection {collection_id}" + "result": f"successfully queued kg entity deduplication summary for collection {graph_id}" } - @orchestration_provider.workflow(name="enrich-graph", timeout="360m") + @orchestration_provider.workflow(name="build-communities", timeout="360m") class EnrichGraphWorkflow: def __init__(self, kg_service: KgService): self.kg_service = kg_service @orchestration_provider.step(retries=1, parents=[], timeout="360m") async def kg_clustering(self, context: Context) -> dict: - - start_time = time.time() - logger.info("Running KG Clustering") input_data = get_input_data_dict( context.workflow_input()["request"] ) - collection_id = input_data["collection_id"] - logger.info( - f"Running KG Clustering for collection {collection_id} with settings {input_data['kg_enrichment_settings']}" - ) + # Get the collection_id and graph_id + collection_id = input_data.get("collection_id", None) + graph_id = input_data.get("graph_id", None) - kg_clustering_results = await self.kg_service.kg_clustering( - collection_id=collection_id, - **input_data["kg_enrichment_settings"], + # Check current workflow status + workflow_status = ( + await self.kg_service.providers.database.get_workflow_status( + id=collection_id, + status_type="graph_cluster_status", + ) ) - logger.info( - f"Successfully ran kg clustering for collection {collection_id}: {json.dumps(kg_clustering_results)}" - ) + if workflow_status == KGEnrichmentStatus.SUCCESS: + raise R2RException( + "Communities have already been built for this collection. To build communities again, first reset the graph.", + 400, + ) - return { - "result": f"successfully ran kg clustering for collection {collection_id}", - "kg_clustering": kg_clustering_results, - } + # Run clustering + try: + kg_clustering_results = await self.kg_service.kg_clustering( + collection_id=collection_id, + graph_id=graph_id, + **input_data["graph_enrichment_settings"], + ) + + num_communities = kg_clustering_results[0]["num_communities"] + + if num_communities == 0: + raise R2RException("No communities found", 400) + + logger.info( + f"Successfully ran kg clustering: {json.dumps(kg_clustering_results)}" + ) + + return { + "result": f"successfully ran kg clustering", + "kg_clustering": kg_clustering_results, + } + except Exception as e: + await self.kg_service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_cluster_status", + status=KGEnrichmentStatus.FAILED, + ) + raise e @orchestration_provider.step(retries=1, parents=["kg_clustering"]) async def kg_community_summary(self, context: Context) -> dict: - input_data = get_input_data_dict( context.workflow_input()["request"] ) - collection_id = input_data["collection_id"] + collection_id = input_data.get("collection_id", None) + graph_id = input_data.get("graph_id", None) + + # Get number of communities from previous step num_communities = context.step_output("kg_clustering")[ "kg_clustering" ][0]["num_communities"] - parallel_communities = min(100, num_communities) - total_workflows = math.ceil(num_communities / parallel_communities) + + # Calculate batching + parallel_communities = min(100, num_communities[0]) + total_workflows = math.ceil( + num_communities[0] / parallel_communities + ) workflows = [] logger.info( f"Running KG Community Summary for {num_communities} communities, spawning {total_workflows} workflows" ) + # Spawn summary workflows for i in range(total_workflows): offset = i * parallel_communities + limit = min(parallel_communities, num_communities[0] - offset) + workflows.append( ( await context.aio.spawn_workflow( @@ -425,12 +558,16 @@ async def kg_community_summary(self, context: Context) -> dict: { "request": { "offset": offset, - "limit": min( - parallel_communities, - num_communities - offset, + "limit": limit, + "graph_id": ( + str(graph_id) if graph_id else None ), - "collection_id": str(collection_id), - **input_data["kg_enrichment_settings"], + "collection_id": ( + str(collection_id) + if collection_id + else None + ), + **input_data["graph_enrichment_settings"], } }, key=f"{i}/{total_workflows}_community_summary", @@ -439,43 +576,131 @@ async def kg_community_summary(self, context: Context) -> dict: ) results = await asyncio.gather(*workflows) + logger.info( + f"Completed {len(results)} community summary workflows" + ) - logger.info(f"Ran {len(results)} workflows for community summary") - - # set status to success - # for all documents in the collection, set kg_creation_status to ENRICHED + # Update statuses document_ids = await self.kg_service.providers.database.get_document_ids_by_status( - status_type="kg_extraction_status", + status_type="extraction_status", status=KGExtractionStatus.SUCCESS, collection_id=collection_id, ) await self.kg_service.providers.database.set_workflow_status( id=document_ids, - status_type="kg_extraction_status", + status_type="extraction_status", status=KGExtractionStatus.ENRICHED, ) await self.kg_service.providers.database.set_workflow_status( id=collection_id, - status_type="kg_enrichment_status", + status_type="graph_cluster_status", status=KGEnrichmentStatus.SUCCESS, ) return { - "result": f"Successfully completed enrichment for collection {collection_id} in {len(results)} workflows." + "result": f"Successfully completed enrichment with {len(results)} summary workflows" } @orchestration_provider.failure() async def on_failure(self, context: Context) -> None: - collection_id = context.workflow_input()["request"][ - "collection_id" - ] - await self.kg_service.providers.database.set_workflow_status( - id=collection_id, - status_type="kg_enrichment_status", - status=KGEnrichmentStatus.FAILED, + collection_id = context.workflow_input()["request"].get( + "collection_id", None ) + if collection_id: + await self.kg_service.providers.database.set_workflow_status( + id=uuid.UUID(collection_id), + status_type="graph_cluster_status", + status=KGEnrichmentStatus.FAILED, + ) + + # @orchestration_provider.step(retries=1, parents=["kg_clustering"]) + # async def kg_community_summary(self, context: Context) -> dict: + + # input_data = get_input_data_dict( + # context.workflow_input()["request"] + # ) + # graph_id = input_data.get("graph_id", None) + # collection_id = input_data.get("collection_id", None) + # num_communities = context.step_output("kg_clustering")[ + # "kg_clustering" + # ][0]["num_communities"] + # parallel_communities = min(100, num_communities) + # total_workflows = math.ceil(num_communities / parallel_communities) + # workflows = [] + + # logger.info( + # f"Running KG Community Summary for {num_communities} communities, spawning {total_workflows} workflows" + # ) + + # for i in range(total_workflows): + # offset = i * parallel_communities + # workflows.append( + # ( + # await context.aio.spawn_workflow( + # "kg-community-summary", + # { + # "request": { + # "offset": offset, + # "limit": min( + # parallel_communities, + # num_communities - offset, + # ), + # "graph_id": ( + # str(graph_id) if graph_id else None + # ), + # "collection_id": ( + # str(collection_id) + # if collection_id + # else None + # ), + # **input_data["graph_enrichment_settings"], + # } + # }, + # key=f"{i}/{total_workflows}_community_summary", + # ) + # ).result() + # ) + + # results = await asyncio.gather(*workflows) + + # logger.info(f"Ran {len(results)} workflows for community summary") + + # # set status to success + # # for all documents in the collection, set kg_creation_status to ENRICHED + # document_ids = await self.kg_service.providers.database.get_document_ids_by_status( + # status_type="extraction_status", + # status=KGExtractionStatus.SUCCESS, + # collection_id=collection_id, + # ) + + # await self.kg_service.providers.database.set_workflow_status( + # id=document_ids, + # status_type="extraction_status", + # status=KGExtractionStatus.ENRICHED, + # ) + + # await self.kg_service.providers.database.set_workflow_status( + # id=graph_id, + # status_type="graph_cluster_status", + # status=KGEnrichmentStatus.SUCCESS, + # ) + + # return { + # "result": f"Successfully completed enrichment for collection {graph_id} in {len(results)} workflows." + # } + + # @orchestration_provider.failure() + # async def on_failure(self, context: Context) -> None: + # collection_id = context.workflow_input()["request"].get( + # "collection_id", None + # ) + # await self.kg_service.providers.database.set_workflow_status( + # id=uuid.UUID(collection_id), + # status_type="graph_cluster_status", + # status=KGEnrichmentStatus.FAILED, + # ) @orchestration_provider.workflow( name="kg-community-summary", timeout="360m" @@ -502,6 +727,8 @@ async def kg_community_summary(self, context: Context) -> dict: start_time = time.time() + logger.info + input_data = get_input_data_dict( context.workflow_input()["request"] ) @@ -518,8 +745,8 @@ async def kg_community_summary(self, context: Context) -> dict: return { "kg-extract": KGExtractDescribeEmbedWorkflow(service), - "create-graph": CreateGraphWorkflow(service), - "enrich-graph": EnrichGraphWorkflow(service), + "extract-triples": CreateGraphWorkflow(service), + "build-communities": EnrichGraphWorkflow(service), "kg-community-summary": KGCommunitySummaryWorkflow(service), "kg-entity-deduplication": EntityDeduplicationWorkflow(service), "kg-entity-deduplication-summary": EntityDeduplicationSummaryWorkflow( diff --git a/py/core/main/orchestration/simple/ingestion_workflow.py b/py/core/main/orchestration/simple/ingestion_workflow.py index d91e87fe2..5eff20b84 100644 --- a/py/core/main/orchestration/simple/ingestion_workflow.py +++ b/py/core/main/orchestration/simple/ingestion_workflow.py @@ -5,7 +5,12 @@ from fastapi import HTTPException from litellm import AuthenticationError -from core.base import DocumentExtraction, R2RException, increment_version +from core.base import ( + DocumentChunk, + KGEnrichmentStatus, + R2RException, + increment_version, +) from core.utils import ( generate_default_user_collection_id, generate_extraction_id, @@ -79,7 +84,7 @@ async def ingest_files(input_data): if not collection_ids: # TODO: Move logic onto the `management service` collection_id = generate_default_user_collection_id( - document_info.user_id + document_info.owner_id ) await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, @@ -89,13 +94,35 @@ async def ingest_files(input_data): document_id=document_info.id, collection_id=collection_id, ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.OUTDATED, + ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_cluster_status", + status=KGEnrichmentStatus.OUTDATED, # NOTE - we should actually check that cluster has been made first, if not it should be PENDING still + ) else: + for collection_id in collection_ids: try: - await service.providers.database.create_collection( - name=document_info.title, + # FIXME: Right now we just throw a warning if the collection already exists, but we should probably handle this more gracefully + name = "My Collection" + description = f"A collection started during {document_info.title} ingestion" + + result = await service.providers.database.create_collection( + owner_id=document_info.owner_id, + name=name, + description=description, collection_id=collection_id, - description="", + ) + await service.providers.database.graph_handler.create( + collection_id=collection_id, + name=name, + description=description, + graph_id=collection_id, ) except Exception as e: logger.warning( @@ -106,10 +133,22 @@ async def ingest_files(input_data): document_id=document_info.id, collection_id=collection_id, ) + await service.providers.database.assign_document_to_collection_vector( document_id=document_info.id, collection_id=collection_id, ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.OUTDATED, + ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_cluster_status", + status=KGEnrichmentStatus.OUTDATED, # NOTE - we should actually check that cluster has been made first, if not it should be PENDING still + ) + except Exception as e: logger.error( f"Error during assigning document to collection: {str(e)}" @@ -134,7 +173,6 @@ async def ingest_files(input_data): ) async def update_files(input_data): - from core.base import IngestionStatus from core.main import IngestionServiceAdapter parsed_data = IngestionServiceAdapter.parse_update_files_input( @@ -159,9 +197,11 @@ async def update_files(input_data): ) documents_overview = ( - await service.providers.database.get_documents_overview( - filter_document_ids=document_ids, + await service.providers.database.get_documents_overview( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + offset=0, + limit=100, filter_user_ids=None if user.is_superuser else [user.id], + filter_document_ids=document_ids, ) )["results"] @@ -230,11 +270,15 @@ async def ingest_chunks(input_data): document_id = document_info.id extractions = [ - DocumentExtraction( - id=generate_extraction_id(document_id, i), + DocumentChunk( + id=( + generate_extraction_id(document_id, i) + if chunk.id is None + else chunk.id + ), document_id=document_id, collection_ids=[], - user_id=document_info.user_id, + owner_id=document_info.owner_id, data=chunk.text, metadata=parsed_data["metadata"], ).model_dump() @@ -265,25 +309,48 @@ async def ingest_chunks(input_data): try: # TODO - Move logic onto management service if not collection_ids: - # TODO: Move logic onto the `management service` collection_id = generate_default_user_collection_id( - document_info.user_id + document_info.owner_id ) + await service.providers.database.assign_document_to_collection_relational( document_id=document_info.id, collection_id=collection_id, ) + await service.providers.database.assign_document_to_collection_vector( document_id=document_info.id, collection_id=collection_id, ) + + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.OUTDATED, + ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_cluster_status", + status=KGEnrichmentStatus.OUTDATED, # NOTE - we should actually check that cluster has been made first, if not it should be PENDING still + ) + else: for collection_id in collection_ids: try: - await service.providers.database.create_collection( - name=document_info.title, + + name = document_info.title or "N/A" + description = "" + result = await service.providers.database.create_collection( + owner_id=document_info.owner_id, + name=name, + description=description, + collection_id=collection_id, + ) + await service.providers.database.graph_handler.create( collection_id=collection_id, - description="", + name=name, + description=description, + graph_id=collection_id, ) except Exception as e: logger.warning( @@ -298,6 +365,17 @@ async def ingest_chunks(input_data): document_id=document_info.id, collection_id=collection_id, ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.OUTDATED, + ) + await service.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_cluster_status", + status=KGEnrichmentStatus.OUTDATED, # NOTE - we should actually check that cluster has been made first, if not it should be PENDING still + ) + except Exception as e: logger.error( f"Error during assigning document to collection: {str(e)}" @@ -326,14 +404,14 @@ async def update_chunk(input_data): else parsed_data["document_id"] ) extraction_uuid = ( - UUID(parsed_data["extraction_id"]) - if isinstance(parsed_data["extraction_id"], str) - else parsed_data["extraction_id"] + UUID(parsed_data["id"]) + if isinstance(parsed_data["id"], str) + else parsed_data["id"] ) await service.update_chunk_ingress( document_id=document_uuid, - extraction_id=extraction_uuid, + chunk_id=extraction_uuid, text=parsed_data.get("text"), user=parsed_data["user"], metadata=parsed_data.get("metadata"), diff --git a/py/core/main/orchestration/simple/kg_workflow.py b/py/core/main/orchestration/simple/kg_workflow.py index 60bc47735..bfc7e2188 100644 --- a/py/core/main/orchestration/simple/kg_workflow.py +++ b/py/core/main/orchestration/simple/kg_workflow.py @@ -16,61 +16,112 @@ def simple_kg_factory(service: KgService): def get_input_data_dict(input_data): for key, value in input_data.items(): + if type(value) == uuid.UUID: + continue + + if key == "document_id": + input_data[key] = uuid.UUID(value) + if key == "collection_id": input_data[key] = uuid.UUID(value) - if key == "kg_creation_settings": + # if key == "graph_id": + # input_data[key] = uuid.UUID(value) + + if key == "graph_creation_settings": input_data[key] = json.loads(value) input_data[key]["generation_config"] = GenerationConfig( **input_data[key]["generation_config"] ) - if key == "kg_enrichment_settings": + if key == "graph_enrichment_settings": input_data[key] = json.loads(value) input_data[key]["generation_config"] = GenerationConfig( **input_data[key]["generation_config"] ) return input_data - async def create_graph(input_data): + async def extract_triples(input_data): input_data = get_input_data_dict(input_data) - document_ids = await service.get_document_ids_for_create_graph( - collection_id=input_data["collection_id"], - **input_data["kg_creation_settings"], - ) + if input_data.get("document_id"): + document_ids = [input_data.get("document_id")] + else: + documents = [] + collection_id = input_data.get("collection_id") + batch_size = 100 + offset = 0 + while True: + # Fetch current batch + batch = ( + await service.providers.database.collections_handler.documents_in_collection( + collection_id=collection_id, + offset=offset, + limit=batch_size, + ) + )["results"] + + # If no documents returned, we've reached the end + if not batch: + break + + # Add current batch to results + documents.extend(batch) + + # Update offset for next batch + offset += batch_size + + # Optional: If batch is smaller than batch_size, we've reached the end + if len(batch) < batch_size: + break + + document_ids = [document.id for document in documents] logger.info( f"Creating graph for {len(document_ids)} documents with IDs: {document_ids}" ) for _, document_id in enumerate(document_ids): - # Extract triples from the document - + # Extract relationships from the document try: - await service.kg_triples_extraction( + extractions = [] + async for extraction in service.kg_extraction( document_id=document_id, - **input_data["kg_creation_settings"], - ) + **input_data["graph_creation_settings"], + ): + extractions.append(extraction) + await service.store_kg_extractions(extractions) + # Describe the entities in the graph await service.kg_entity_description( document_id=document_id, - **input_data["kg_creation_settings"], + **input_data["graph_creation_settings"], ) except Exception as e: logger.error( f"Error in creating graph for document {document_id}: {e}" ) + raise e async def enrich_graph(input_data): input_data = get_input_data_dict(input_data) + workflow_status = await service.providers.database.get_workflow_status( + id=input_data.get("collection_id", None), + status_type="graph_cluster_status", + ) + if workflow_status == KGEnrichmentStatus.SUCCESS: + raise R2RException( + "Communities have already been built for this collection. To build communities again, first submit a POST request to `graphs/{collection_id}/reset` to erase the previously built communities.", + 400, + ) try: num_communities = await service.kg_clustering( - collection_id=input_data["collection_id"], - **input_data["kg_enrichment_settings"], + collection_id=input_data.get("collection_id", None), + # graph_id=input_data.get("graph_id", None), + **input_data["graph_enrichment_settings"], ) num_communities = num_communities[0]["num_communities"] # TODO - Do not hardcode the number of parallel communities, @@ -79,38 +130,37 @@ async def enrich_graph(input_data): if num_communities == 0: raise R2RException("No communities found", 400) - parallel_communities = min(100, num_communities) + parallel_communities = min(100, num_communities[0]) - total_workflows = math.ceil(num_communities / parallel_communities) + total_workflows = math.ceil( + num_communities[0] / parallel_communities + ) for i in range(total_workflows): input_data_copy = input_data.copy() input_data_copy["offset"] = i * parallel_communities input_data_copy["limit"] = min( parallel_communities, - num_communities - i * parallel_communities, + num_communities[0] - i * parallel_communities, ) - # running i'th workflow out of total_workflows + logger.info( - f"Running kg community summary for {i+1}'th workflow out of total {total_workflows} workflows" + f"Running kg community summary for workflow {i+1} of {total_workflows}" ) await kg_community_summary( input_data=input_data_copy, ) await service.providers.database.set_workflow_status( - id=input_data["collection_id"], - status_type="kg_enrichment_status", + id=input_data.get("collection_id", None), + status_type="graph_cluster_status", status=KGEnrichmentStatus.SUCCESS, ) - return { - "result": "successfully ran kg community summary workflows" - } except Exception as e: await service.providers.database.set_workflow_status( - id=input_data["collection_id"], - status_type="kg_enrichment_status", + id=input_data.get("collection_id", None), + status_type="graph_cluster_status", status=KGEnrichmentStatus.FAILED, ) @@ -125,24 +175,27 @@ async def kg_community_summary(input_data): await service.kg_community_summary( offset=input_data["offset"], limit=input_data["limit"], - collection_id=input_data["collection_id"], - **input_data["kg_enrichment_settings"], + collection_id=input_data.get("collection_id", None), + # graph_id=input_data.get("graph_id", None), + **input_data["graph_enrichment_settings"], ) async def entity_deduplication_workflow(input_data): # TODO: We should determine how we want to handle the input here and syncronize it across all simple orchestration methods - if isinstance(input_data["kg_entity_deduplication_settings"], str): - input_data["kg_entity_deduplication_settings"] = json.loads( - input_data["kg_entity_deduplication_settings"] + if isinstance(input_data["graph_entity_deduplication_settings"], str): + input_data["graph_entity_deduplication_settings"] = json.loads( + input_data["graph_entity_deduplication_settings"] ) - collection_id = input_data["collection_id"] + collection_id = input_data.get("collection_id", None) + graph_id = input_data.get("graph_id", None) number_of_distinct_entities = ( await service.kg_entity_deduplication( collection_id=collection_id, - **input_data["kg_entity_deduplication_settings"], + graph_id=graph_id, + **input_data["graph_entity_deduplication_settings"], ) )[0]["num_entities"] @@ -150,12 +203,12 @@ async def entity_deduplication_workflow(input_data): collection_id=collection_id, offset=0, limit=number_of_distinct_entities, - **input_data["kg_entity_deduplication_settings"], + **input_data["graph_entity_deduplication_settings"], ) return { - "create-graph": create_graph, - "enrich-graph": enrich_graph, + "extract-triples": extract_triples, + "build-communities": enrich_graph, "kg-community-summary": kg_community_summary, "entity-deduplication": entity_deduplication_workflow, } diff --git a/py/core/main/services/auth_service.py b/py/core/main/services/auth_service.py index 8eb350605..5d0ef32cf 100644 --- a/py/core/main/services/auth_service.py +++ b/py/core/main/services/auth_service.py @@ -3,9 +3,10 @@ from uuid import UUID from core.base import R2RException, RunManager, Token -from core.base.api.models import UserResponse +from core.base.api.models import User from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider from core.telemetry.telemetry_decorator import telemetry_event +from core.utils import generate_default_user_collection_id from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders from ..config import R2RConfig @@ -34,7 +35,7 @@ def __init__( ) @telemetry_event("RegisterUser") - async def register(self, email: str, password: str) -> UserResponse: + async def register(self, email: str, password: str) -> User: return await self.providers.auth.register(email, password) @telemetry_event("VerifyEmail") @@ -73,7 +74,7 @@ async def login(self, email: str, password: str) -> dict[str, Token]: return await self.providers.auth.login(email, password) @telemetry_event("GetCurrentUser") - async def user(self, token: str) -> UserResponse: + async def user(self, token: str) -> User: token_data = await self.providers.auth.decode_token(token) if not token_data.email: raise R2RException( @@ -96,7 +97,7 @@ async def refresh_access_token( @telemetry_event("ChangePassword") async def change_password( - self, user: UserResponse, current_password: str, new_password: str + self, user: User, current_password: str, new_password: str ) -> dict[str, str]: if not user: raise R2RException(status_code=404, message="User not found") @@ -129,10 +130,8 @@ async def update_user( name: Optional[str] = None, bio: Optional[str] = None, profile_picture: Optional[str] = None, - ) -> UserResponse: - user: UserResponse = await self.providers.database.get_user_by_id( - user_id - ) + ) -> User: + user: User = await self.providers.database.get_user_by_id(user_id) if not user: raise R2RException(status_code=404, message="User not found") if email is not None: @@ -151,26 +150,41 @@ async def update_user( async def delete_user( self, user_id: UUID, - password: str, + password: Optional[str] = None, delete_vector_data: bool = False, is_superuser: bool = False, ) -> dict[str, str]: user = await self.providers.database.get_user_by_id(user_id) if not user: raise R2RException(status_code=404, message="User not found") + if not is_superuser and not password: + raise R2RException( + status_code=422, message="Password is required for deletion" + ) if not ( is_superuser or ( user.hashed_password is not None - and self.providers.auth.crypto_provider.verify_password( # type: ignore - password, user.hashed_password + and self.providers.auth.crypto_provider.verify_password( + password, user.hashed_password # type: ignore ) ) ): raise R2RException(status_code=400, message="Incorrect password") await self.providers.database.delete_user_relational(user_id) + + # Delete user's default collection + # TODO: We need to better define what happens to the user's data when they are deleted + collection_id = generate_default_user_collection_id(user_id) + await self.providers.database.delete_collection_relational( + collection_id + ) + if delete_vector_data: await self.providers.database.delete_user_vector(user_id) + await self.providers.database.delete_collection_vector( + collection_id + ) return {"message": f"User account {user_id} deleted successfully."} @@ -186,14 +200,17 @@ async def clean_expired_blacklisted_tokens( @telemetry_event("GetUserVerificationCode") async def get_user_verification_code( - self, user_id: UUID, *args, **kwargs + self, + user_id: UUID, ) -> dict: """ Get only the verification code data for a specific user. This method should be called after superuser authorization has been verified. """ verification_data = ( - await self.providers.database.get_user_validation_data(user_id) + await self.providers.database.get_user_validation_data( + user_id=user_id + ) ) return { "verification_code": verification_data["verification_data"][ @@ -206,14 +223,17 @@ async def get_user_verification_code( @telemetry_event("GetUserVerificationCode") async def get_user_reset_token( - self, user_id: UUID, *args, **kwargs + self, + user_id: UUID, ) -> dict: """ Get only the verification code data for a specific user. This method should be called after superuser authorization has been verified. """ verification_data = ( - await self.providers.database.get_user_validation_data(user_id) + await self.providers.database.get_user_validation_data( + user_id=user_id + ) ) return { "reset_token": verification_data["verification_data"][ diff --git a/templates/agentic_rag_chatbot/python-backend/README.md b/py/core/main/services/graph_service.py similarity index 100% rename from templates/agentic_rag_chatbot/python-backend/README.md rename to py/core/main/services/graph_service.py diff --git a/py/core/main/services/ingestion_service.py b/py/core/main/services/ingestion_service.py index f1a45cb86..870f87713 100644 --- a/py/core/main/services/ingestion_service.py +++ b/py/core/main/services/ingestion_service.py @@ -3,21 +3,22 @@ import logging import uuid from datetime import datetime -from typing import Any, AsyncGenerator, Optional, Sequence, Union +from typing import Any, AsyncGenerator, Optional, Sequence from uuid import UUID from fastapi import HTTPException from core.base import ( Document, - DocumentExtraction, - DocumentInfo, + DocumentChunk, + DocumentResponse, DocumentType, GenerationConfig, IngestionStatus, R2RException, RawChunk, RunManager, + UnprocessedChunk, Vector, VectorEntry, VectorType, @@ -30,7 +31,7 @@ IndexMethod, VectorTableName, ) -from core.base.api.models import UserResponse +from core.base.api.models import User from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider from core.telemetry.telemetry_decorator import telemetry_event @@ -70,7 +71,7 @@ def __init__( async def ingest_file_ingress( self, file_data: dict, - user: UserResponse, + user: User, document_id: UUID, size_in_bytes, metadata: Optional[dict] = None, @@ -104,7 +105,9 @@ async def ingest_file_ingress( ) existing_document_info = ( - await self.providers.database.get_documents_overview( + await self.providers.database.get_documents_overview( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + offset=0, + limit=100, filter_user_ids=[user.id], filter_document_ids=[document_id], ) @@ -119,12 +122,12 @@ async def ingest_file_ingress( ): raise R2RException( status_code=409, - message=f"Must increment version number before attempting to overwrite document {document_id}. Use the `update_files` endpoint if you are looking to update the existing version.", + message=f"Document {document_id} already exists. Submit a DELETE request to `/documents/{document_id}` to delete this document and allow for re-ingestion.", ) elif existing_doc.ingestion_status != IngestionStatus.FAILED: raise R2RException( status_code=409, - message=f"Document {document_id} was already ingested and is not in a failed state.", + message=f"Document {document_id} is currently ingesting.", ) await self.providers.database.upsert_documents_overview( @@ -145,13 +148,15 @@ async def ingest_file_ingress( def _create_document_info_from_file( self, document_id: UUID, - user: UserResponse, + user: User, file_name: str, metadata: dict, version: str, size_in_bytes: int, - ) -> DocumentInfo: - file_extension = file_name.split(".")[-1].lower() + ) -> DocumentResponse: + file_extension = ( + file_name.split(".")[-1].lower() if file_name != "N/A" else "txt" + ) if file_extension.upper() not in DocumentType.__members__: raise R2RException( status_code=415, @@ -161,12 +166,16 @@ def _create_document_info_from_file( metadata = metadata or {} metadata["version"] = version - return DocumentInfo( + return DocumentResponse( id=document_id, - user_id=user.id, + owner_id=user.id, collection_ids=metadata.get("collection_ids", []), document_type=DocumentType[file_extension.upper()], - title=metadata.get("title", file_name.split("/")[-1]), + title=( + metadata.get("title", file_name.split("/")[-1]) + if file_name != "N/A" + else "N/A" + ), metadata=metadata, version=version, size_in_bytes=size_in_bytes, @@ -178,17 +187,17 @@ def _create_document_info_from_file( def _create_document_info_from_chunks( self, document_id: UUID, - user: UserResponse, + user: User, chunks: list[RawChunk], metadata: dict, version: str, - ) -> DocumentInfo: + ) -> DocumentResponse: metadata = metadata or {} metadata["version"] = version - return DocumentInfo( + return DocumentResponse( id=document_id, - user_id=user.id, + owner_id=user.id, collection_ids=metadata.get("collection_ids", []), document_type=DocumentType.TXT, title=metadata.get("title", f"Ingested Chunks - {document_id}"), @@ -203,14 +212,14 @@ def _create_document_info_from_chunks( ) async def parse_file( - self, document_info: DocumentInfo, ingestion_config: dict - ) -> AsyncGenerator[DocumentExtraction, None]: + self, document_info: DocumentResponse, ingestion_config: dict + ) -> AsyncGenerator[DocumentChunk, None]: return await self.pipes.parsing_pipe.run( input=self.pipes.parsing_pipe.Input( message=Document( id=document_info.id, collection_ids=document_info.collection_ids, - user_id=document_info.user_id, + owner_id=document_info.owner_id, metadata={ "document_type": document_info.document_type.value, **document_info.metadata, @@ -225,7 +234,7 @@ async def parse_file( async def augment_document_info( self, - document_info: DocumentInfo, + document_info: DocumentResponse, chunked_documents: list[dict], ) -> None: if not self.config.ingestion.skip_document_summary: @@ -244,9 +253,12 @@ async def augment_document_info( task_prompt_name=self.config.ingestion.document_summary_task_prompt, task_inputs={"document": document}, ) + # FIXME: Why are we hardcoding the model here? response = await self.providers.llm.aget_completion( messages=messages, - generation_config=GenerationConfig(model="openai/gpt-4o-mini"), + generation_config=GenerationConfig( + model=self.config.ingestion.document_summary_model + ), ) document_info.summary = response.choices[0].message.content # type: ignore @@ -267,7 +279,7 @@ async def embed_document( return await self.pipes.embedding_pipe.run( input=self.pipes.embedding_pipe.Input( message=[ - DocumentExtraction.from_dict(chunk) + DocumentChunk.from_dict(chunk) for chunk in chunked_documents ] ), @@ -277,7 +289,7 @@ async def embed_document( async def store_embeddings( self, - embeddings: Sequence[Union[dict, VectorEntry]], + embeddings: Sequence[dict | VectorEntry], ) -> AsyncGenerator[str, None]: vector_entries = [ ( @@ -296,7 +308,7 @@ async def store_embeddings( async def finalize_ingestion( self, - document_info: DocumentInfo, + document_info: DocumentResponse, is_update: bool = False, ) -> None: if is_update: @@ -320,13 +332,15 @@ async def empty_generator(): async def update_document_status( self, - document_info: DocumentInfo, + document_info: DocumentResponse, status: IngestionStatus, ) -> None: document_info.ingestion_status = status await self._update_document_status_in_db(document_info) - async def _update_document_status_in_db(self, document_info: DocumentInfo): + async def _update_document_status_in_db( + self, document_info: DocumentResponse + ): try: await self.providers.database.upsert_documents_overview( document_info @@ -348,10 +362,10 @@ async def ingest_chunks_ingress( document_id: UUID, metadata: Optional[dict], chunks: list[RawChunk], - user: UserResponse, + user: User, *args: Any, **kwargs: Any, - ) -> DocumentInfo: + ) -> DocumentResponse: if not chunks: raise R2RException( status_code=400, message="No chunks provided for ingestion." @@ -369,7 +383,9 @@ async def ingest_chunks_ingress( ) existing_document_info = ( - await self.providers.database.get_documents_overview( + await self.providers.database.get_documents_overview( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + offset=0, + limit=100, filter_user_ids=[user.id], filter_document_ids=[document_id], ) @@ -391,33 +407,35 @@ async def ingest_chunks_ingress( async def update_chunk_ingress( self, document_id: UUID, - extraction_id: UUID, + chunk_id: UUID, text: str, - user: UserResponse, + user: User, metadata: Optional[dict] = None, *args: Any, **kwargs: Any, ) -> dict: # Verify chunk exists and user has access - existing_chunks = await self.providers.database.get_document_chunks( - document_id=document_id, limit=1 + existing_chunks = await self.providers.database.list_document_chunks( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + document_id=document_id, + offset=0, + limit=1, ) if not existing_chunks["results"]: raise R2RException( status_code=404, - message=f"Chunk with extraction_id {extraction_id} not found.", + message=f"Chunk with chunk_id {chunk_id} not found.", ) - existing_chunk = await self.providers.database.get_chunk(extraction_id) + existing_chunk = await self.providers.database.get_chunk(chunk_id) if not existing_chunk: raise R2RException( status_code=404, - message=f"Chunk with id {extraction_id} not found", + message=f"Chunk with id {chunk_id} not found", ) if ( - str(existing_chunk["user_id"]) != str(user.id) + str(existing_chunk["owner_id"]) != str(user.id) and not user.is_superuser ): raise R2RException( @@ -436,17 +454,17 @@ async def update_chunk_ingress( # Create updated extraction extraction_data = { - "id": extraction_id, + "id": chunk_id, "document_id": document_id, "collection_ids": kwargs.get( "collection_ids", existing_chunk["collection_ids"] ), - "user_id": existing_chunk["user_id"], + "owner_id": existing_chunk["owner_id"], "data": text or existing_chunk["text"], "metadata": merged_metadata, } - extraction = DocumentExtraction(**extraction_data).model_dump() + extraction = DocumentChunk(**extraction_data).model_dump() embedding_generator = await self.embed_document([extraction]) embeddings = [ @@ -465,7 +483,7 @@ async def _get_enriched_chunk_text( chunk: dict, document_id: UUID, chunk_enrichment_settings: ChunkEnrichmentSettings, - document_chunks: list[dict], + list_document_chunks: list[dict], document_chunks_dict: dict, ) -> VectorEntry: # get chunks in context @@ -473,29 +491,29 @@ async def _get_enriched_chunk_text( for enrichment_strategy in chunk_enrichment_settings.strategies: if enrichment_strategy == ChunkEnrichmentStrategy.NEIGHBORHOOD: context_chunk_ids.extend( - document_chunks[chunk_idx - prev]["extraction_id"] + list_document_chunks[chunk_idx - prev]["chunk_id"] for prev in range( 1, chunk_enrichment_settings.backward_chunks + 1 ) if chunk_idx - prev >= 0 ) context_chunk_ids.extend( - document_chunks[chunk_idx + next]["extraction_id"] + list_document_chunks[chunk_idx + next]["chunk_id"] for next in range( 1, chunk_enrichment_settings.forward_chunks + 1 ) - if chunk_idx + next < len(document_chunks) + if chunk_idx + next < len(list_document_chunks) ) elif enrichment_strategy == ChunkEnrichmentStrategy.SEMANTIC: semantic_neighbors = await self.providers.database.get_semantic_neighbors( - document_id=document_id, - chunk_id=chunk["extraction_id"], + offset=0, limit=chunk_enrichment_settings.semantic_neighbors, + document_id=document_id, + chunk_id=chunk["chunk_id"], similarity_threshold=chunk_enrichment_settings.semantic_similarity_threshold, ) context_chunk_ids.extend( - neighbor["extraction_id"] - for neighbor in semantic_neighbors + neighbor["chunk_id"] for neighbor in semantic_neighbors ) # weird behavior, sometimes we get UUIDs @@ -565,12 +583,10 @@ async def _get_enriched_chunk_text( chunk["metadata"]["original_text"] = chunk["text"] return VectorEntry( - extraction_id=uuid.uuid5( - uuid.NAMESPACE_DNS, str(chunk["extraction_id"]) - ), + id=uuid.uuid5(uuid.NAMESPACE_DNS, str(chunk["chunk_id"])), vector=Vector(data=data, type=VectorType.FIXED, length=len(data)), document_id=document_id, - user_id=chunk["user_id"], + owner_id=chunk["owner_id"], collection_ids=chunk["collection_ids"], text=updated_chunk_text or chunk["text"], metadata=chunk["metadata"], @@ -583,28 +599,34 @@ async def chunk_enrichment( ) -> int: # just call the pipe on every chunk of the document - # get all document_chunks - document_chunks = ( - await self.providers.database.get_document_chunks( + # TODO: Why is the config not recognized as an ingestionconfig but as a providerconfig? + chunk_enrichment_settings = ( + self.providers.ingestion.config.chunk_enrichment_settings # type: ignore + ) + # get all list_document_chunks + list_document_chunks = ( + await self.providers.database.list_document_chunks( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. document_id=document_id, + offset=0, + limit=100, ) )["results"] new_vector_entries = [] document_chunks_dict = { - chunk["extraction_id"]: chunk for chunk in document_chunks + chunk["chunk_id"]: chunk for chunk in list_document_chunks } tasks = [] total_completed = 0 - for chunk_idx, chunk in enumerate(document_chunks): + for chunk_idx, chunk in enumerate(list_document_chunks): tasks.append( self._get_enriched_chunk_text( chunk_idx, chunk, document_id, chunk_enrichment_settings, - document_chunks, + list_document_chunks, document_chunks_dict, ) ) @@ -613,13 +635,13 @@ async def chunk_enrichment( new_vector_entries.extend(await asyncio.gather(*tasks)) total_completed += 128 logger.info( - f"Completed {total_completed} out of {len(document_chunks)} chunks for document {document_id}" + f"Completed {total_completed} out of {len(list_document_chunks)} chunks for document {document_id}" ) tasks = [] new_vector_entries.extend(await asyncio.gather(*tasks)) logger.info( - f"Completed enrichment of {len(document_chunks)} chunks for document {document_id}" + f"Completed enrichment of {len(list_document_chunks)} chunks for document {document_id}" ) # delete old chunks from vector db @@ -634,18 +656,45 @@ async def chunk_enrichment( return len(new_vector_entries) + # TODO - This should return a typed object + async def list_chunks( + self, + offset: int, + limit: int, + filters: Optional[dict[str, Any]] = None, + include_vectors: bool = False, + *args: Any, + **kwargs: Any, + ) -> dict: + return await self.providers.database.list_chunks( + offset=offset, + limit=limit, + filters=filters, + include_vectors=include_vectors, + ) + + # TODO - This should return a typed object + async def get_chunk( + self, + # document_id: UUID, + chunk_id: UUID, + *args: Any, + **kwargs: Any, + ) -> dict: + return await self.providers.database.get_chunk(chunk_id) + async def update_document_metadata( self, document_id: UUID, metadata: dict, - user: UserResponse, + user: User, ) -> None: # Verify document exists and user has access - existing_document = ( - await self.providers.database.get_documents_overview( - filter_document_ids=[document_id], - filter_user_ids=[user.id], - ) + existing_document = await self.providers.database.get_documents_overview( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + offset=0, + limit=100, + filter_document_ids=[document_id], + filter_user_ids=[user.id], ) if not existing_document["results"]: @@ -671,7 +720,7 @@ async def update_document_metadata( class IngestionServiceAdapter: @staticmethod - def _parse_user_data(user_data) -> UserResponse: + def _parse_user_data(user_data) -> User: if isinstance(user_data, str): try: user_data = json.loads(user_data) @@ -679,7 +728,7 @@ def _parse_user_data(user_data) -> UserResponse: raise ValueError( f"Invalid user data format: {user_data}" ) from e - return UserResponse.from_dict(user_data) + return User.from_dict(user_data) @staticmethod def _parse_chunk_enrichment_settings( @@ -718,7 +767,10 @@ def parse_ingest_chunks_input(data: dict) -> dict: "user": IngestionServiceAdapter._parse_user_data(data["user"]), "metadata": data["metadata"], "document_id": data["document_id"], - "chunks": [RawChunk.from_dict(chunk) for chunk in data["chunks"]], + "chunks": [ + UnprocessedChunk.from_dict(chunk) for chunk in data["chunks"] + ], + "id": data.get("id"), } @staticmethod @@ -726,7 +778,7 @@ def parse_update_chunk_input(data: dict) -> dict: return { "user": IngestionServiceAdapter._parse_user_data(data["user"]), "document_id": UUID(data["document_id"]), - "extraction_id": UUID(data["extraction_id"]), + "id": UUID(data["id"]), "text": data["text"], "metadata": data.get("metadata"), "collection_ids": data.get("collection_ids", []), diff --git a/py/core/main/services/kg_service.py b/py/core/main/services/kg_service.py index c80e60ecc..48e66160b 100644 --- a/py/core/main/services/kg_service.py +++ b/py/core/main/services/kg_service.py @@ -1,20 +1,35 @@ +import asyncio +import json import logging import math +import re import time -from typing import AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional, Union from uuid import UUID from fastapi import HTTPException -from core.base import KGExtractionStatus, RunManager +from core.base import ( + DocumentChunk, + KGExtraction, + KGExtractionStatus, + R2RDocumentProcessingError, + RunManager, +) from core.base.abstractions import ( + Community, + Entity, GenerationConfig, + Graph, KGCreationSettings, KGEnrichmentSettings, + KGEnrichmentStatus, KGEntityDeduplicationSettings, KGEntityDeduplicationType, R2RException, + Relationship, ) +from core.base.api.models import GraphResponse from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider from core.telemetry.telemetry_decorator import telemetry_event @@ -25,6 +40,9 @@ logger = logging.getLogger() +MIN_VALID_KG_EXTRACTION_RESPONSE_LENGTH = 128 + + async def _collect_results(result_gen: AsyncGenerator) -> list[dict]: results = [] async for res in result_gen: @@ -55,13 +73,13 @@ def __init__( logging_connection, ) - @telemetry_event("kg_triples_extraction") - async def kg_triples_extraction( + @telemetry_event("kg_relationships_extraction") + async def kg_relationships_extraction( self, document_id: UUID, generation_config: GenerationConfig, - extraction_merge_count: int, - max_knowledge_triples: int, + chunk_merge_count: int, + max_knowledge_relationships: int, entity_types: list[str], relation_types: list[str], **kwargs, @@ -74,17 +92,17 @@ async def kg_triples_extraction( await self.providers.database.set_workflow_status( id=document_id, - status_type="kg_extraction_status", + status_type="extraction_status", status=KGExtractionStatus.PROCESSING, ) - triples = await self.pipes.kg_triples_extraction_pipe.run( - input=self.pipes.kg_triples_extraction_pipe.Input( + relationships = await self.pipes.kg_relationships_extraction_pipe.run( + input=self.pipes.kg_relationships_extraction_pipe.Input( message={ "document_id": document_id, "generation_config": generation_config, - "extraction_merge_count": extraction_merge_count, - "max_knowledge_triples": max_knowledge_triples, + "chunk_merge_count": chunk_merge_count, + "max_knowledge_relationships": max_knowledge_relationships, "entity_types": entity_types, "relation_types": relation_types, "logger": logger, @@ -99,7 +117,7 @@ async def kg_triples_extraction( ) result_gen = await self.pipes.kg_storage_pipe.run( - input=self.pipes.kg_storage_pipe.Input(message=triples), + input=self.pipes.kg_storage_pipe.Input(message=relationships), state=None, run_manager=self.run_manager, ) @@ -108,18 +126,353 @@ async def kg_triples_extraction( logger.error(f"KGService: Error in kg_extraction: {e}") await self.providers.database.set_workflow_status( id=document_id, - status_type="kg_extraction_status", + status_type="extraction_status", status=KGExtractionStatus.FAILED, ) raise e return await _collect_results(result_gen) + @telemetry_event("create_entity") + async def create_entity( + self, + name: str, + description: str, + parent_id: UUID, + category: Optional[str] = None, + metadata: Optional[dict] = None, + ) -> Entity: + + description_embedding = str( + await self.providers.embedding.async_get_embedding(description) + ) + + return await self.providers.database.graph_handler.entities.create( + name=name, + parent_id=parent_id, + store_type="graphs", # type: ignore + category=category, + description=description, + description_embedding=description_embedding, + metadata=metadata, + ) + + @telemetry_event("update_entity") + async def update_entity( + self, + entity_id: UUID, + name: Optional[str] = None, + description: Optional[str] = None, + category: Optional[str] = None, + metadata: Optional[dict] = None, + ) -> Entity: + + description_embedding = None + if description is not None: + description_embedding = str( + await self.providers.embedding.async_get_embedding(description) + ) + + return await self.providers.database.graph_handler.entities.update( + entity_id=entity_id, + store_type="graphs", # type: ignore + name=name, + description=description, + description_embedding=description_embedding, + category=category, + metadata=metadata, + ) + + @telemetry_event("delete_entity") + async def delete_entity( + self, + parent_id: UUID, + entity_id: UUID, + ): + return await self.providers.database.graph_handler.entities.delete( + parent_id=parent_id, + entity_ids=[entity_id], + store_type="graphs", # type: ignore + ) + + @telemetry_event("get_entities") + async def get_entities( + self, + parent_id: UUID, + offset: int, + limit: int, + entity_ids: Optional[list[UUID]] = None, + entity_names: Optional[list[str]] = None, + include_embeddings: bool = False, + ): + return await self.providers.database.graph_handler.get_entities( + parent_id=parent_id, + offset=offset, + limit=limit, + entity_ids=entity_ids, + entity_names=entity_names, + include_embeddings=include_embeddings, + ) + + @telemetry_event("create_relationship") + async def create_relationship( + self, + subject: str, + subject_id: UUID, + predicate: str, + object: str, + object_id: UUID, + parent_id: UUID, + description: str | None = None, + weight: float | None = 1.0, + metadata: Optional[dict[str, Any] | str] = None, + ) -> Relationship: + description_embedding = None + if description: + description_embedding = str( + await self.providers.embedding.async_get_embedding(description) + ) + + return ( + await self.providers.database.graph_handler.relationships.create( + subject=subject, + subject_id=subject_id, + predicate=predicate, + object=object, + object_id=object_id, + parent_id=parent_id, + description=description, + description_embedding=description_embedding, + weight=weight, + metadata=metadata, + store_type="graphs", # type: ignore + ) + ) + + @telemetry_event("delete_relationship") + async def delete_relationship( + self, + parent_id: UUID, + relationship_id: UUID, + ): + return ( + await self.providers.database.graph_handler.relationships.delete( + parent_id=parent_id, + relationship_ids=[relationship_id], + store_type="graphs", # type: ignore + ) + ) + + @telemetry_event("update_relationship") + async def update_relationship( + self, + relationship_id: UUID, + subject: Optional[str] = None, + subject_id: Optional[UUID] = None, + predicate: Optional[str] = None, + object: Optional[str] = None, + object_id: Optional[UUID] = None, + description: Optional[str] = None, + weight: Optional[float] = None, + metadata: Optional[dict[str, Any] | str] = None, + ) -> Relationship: + + description_embedding = None + if description is not None: + description_embedding = str( + await self.providers.embedding.async_get_embedding(description) + ) + + return ( + await self.providers.database.graph_handler.relationships.update( + relationship_id=relationship_id, + subject=subject, + subject_id=subject_id, + predicate=predicate, + object=object, + object_id=object_id, + description=description, + description_embedding=description_embedding, + weight=weight, + metadata=metadata, + store_type="graphs", # type: ignore + ) + ) + + @telemetry_event("get_relationships") + async def get_relationships( + self, + parent_id: UUID, + offset: int, + limit: int, + relationship_ids: Optional[list[UUID]] = None, + entity_names: Optional[list[str]] = None, + ): + return await self.providers.database.graph_handler.relationships.get( + parent_id=parent_id, + store_type="graphs", # type: ignore + offset=offset, + limit=limit, + relationship_ids=relationship_ids, + entity_names=entity_names, + ) + + @telemetry_event("create_community") + async def create_community( + self, + parent_id: UUID, + name: str, + summary: str, + findings: Optional[list[str]], + rating: Optional[float], + rating_explanation: Optional[str], + ) -> Community: + description_embedding = str( + await self.providers.embedding.async_get_embedding(summary) + ) + return await self.providers.database.graph_handler.communities.create( + parent_id=parent_id, + store_type="graphs", # type: ignore + name=name, + summary=summary, + description_embedding=description_embedding, + findings=findings, + rating=rating, + rating_explanation=rating_explanation, + ) + + @telemetry_event("update_community") + async def update_community( + self, + community_id: UUID, + name: Optional[str], + summary: Optional[str], + findings: Optional[list[str]], + rating: Optional[float], + rating_explanation: Optional[str], + ) -> Community: + summary_embedding = None + if summary is not None: + summary_embedding = str( + await self.providers.embedding.async_get_embedding(summary) + ) + + return await self.providers.database.graph_handler.communities.update( + community_id=community_id, + store_type="graphs", # type: ignore + name=name, + summary=summary, + summary_embedding=summary_embedding, + findings=findings, + rating=rating, + rating_explanation=rating_explanation, + ) + + @telemetry_event("delete_community") + async def delete_community( + self, + parent_id: UUID, + community_id: UUID, + ) -> None: + await self.providers.database.graph_handler.communities.delete( + parent_id=parent_id, + community_id=community_id, + ) + + @telemetry_event("list_communities") + async def list_communities( + self, + collection_id: UUID, + offset: int, + limit: int, + ): + return await self.providers.database.graph_handler.communities.get( + parent_id=collection_id, + store_type="graphs", # type: ignore + offset=offset, + limit=limit, + ) + + @telemetry_event("get_communities") + async def get_communities( + self, + parent_id: UUID, + offset: int, + limit: int, + community_ids: Optional[list[UUID]] = None, + community_names: Optional[list[str]] = None, + include_embeddings: bool = False, + ): + return await self.providers.database.graph_handler.get_communities( + parent_id=parent_id, + offset=offset, + limit=limit, + community_ids=community_ids, + include_embeddings=include_embeddings, + ) + + # @telemetry_event("create_new_graph") + # async def create_new_graph( + # self, + # collection_id: UUID, + # user_id: UUID, + # name: Optional[str], + # description: str = "", + # ) -> GraphResponse: + # return await self.providers.database.graph_handler.create( + # collection_id=collection_id, + # user_id=user_id, + # name=name, + # description=description, + # graph_id=collection_id, + # ) + + async def list_graphs( + self, + offset: int, + limit: int, + # user_ids: Optional[list[UUID]] = None, + graph_ids: Optional[list[UUID]] = None, + collection_id: Optional[UUID] = None, + ) -> dict[str, list[GraphResponse] | int]: + return await self.providers.database.graph_handler.list_graphs( + offset=offset, + limit=limit, + # filter_user_ids=user_ids, + filter_graph_ids=graph_ids, + filter_collection_id=collection_id, + ) + + @telemetry_event("update_graph") + async def update_graph( + self, + collection_id: UUID, + name: Optional[str] = None, + description: Optional[str] = None, + ) -> GraphResponse: + return await self.providers.database.graph_handler.update( + collection_id=collection_id, + name=name, + description=description, + ) + + @telemetry_event("reset_graph_v3") + async def reset_graph_v3(self, id: UUID) -> bool: + await self.providers.database.graph_handler.reset( + parent_id=id, + ) + await self.providers.database.document_handler.set_workflow_status( + id=id, + status_type="graph_cluster_status", + status=KGEnrichmentStatus.PENDING, + ) + return True + @telemetry_event("get_document_ids_for_create_graph") async def get_document_ids_for_create_graph( self, collection_id: UUID, - force_kg_creation: bool, + force_kg_creation: bool = False, **kwargs, ): @@ -132,16 +485,12 @@ async def get_document_ids_for_create_graph( KGExtractionStatus.PROCESSING, ] - document_ids = ( - await self.providers.database.get_document_ids_by_status( - status_type="kg_extraction_status", - status=[str(ele) for ele in document_status_filter], - collection_id=collection_id, - ) + return await self.providers.database.get_document_ids_by_status( + status_type="extraction_status", + status=[str(ele) for ele in document_status_filter], + collection_id=collection_id, ) - return document_ids - @telemetry_event("kg_entity_description") async def kg_entity_description( self, @@ -156,10 +505,12 @@ async def kg_entity_description( f"KGService: Running kg_entity_description for document {document_id}" ) - entity_count = await self.providers.database.get_entity_count( - document_id=document_id, - distinct=True, - entity_table_name="chunk_entity", + entity_count = ( + await self.providers.database.graph_handler.get_entity_count( + document_id=document_id, + distinct=True, + entity_table_name="documents_entities", + ) ) logger.info( @@ -202,7 +553,7 @@ async def kg_entity_description( await self.providers.database.set_workflow_status( id=document_id, - status_type="kg_extraction_status", + status_type="extraction_status", status=KGExtractionStatus.SUCCESS, ) @@ -216,6 +567,7 @@ async def kg_entity_description( async def kg_clustering( self, collection_id: UUID, + # graph_id: UUID, generation_config: GenerationConfig, leiden_params: dict, **kwargs, @@ -224,6 +576,7 @@ async def kg_clustering( logger.info( f"Running ClusteringPipe for collection {collection_id} with settings {leiden_params}" ) + clustering_result = await self.pipes.kg_clustering_pipe.run( input=self.pipes.kg_clustering_pipe.Input( message={ @@ -245,7 +598,8 @@ async def kg_community_summary( limit: int, max_summary_input_length: int, generation_config: GenerationConfig, - collection_id: UUID, + collection_id: UUID | None, + # graph_id: UUID | None, **kwargs, ): summary_results = await self.pipes.kg_community_summary_pipe.run( @@ -256,6 +610,7 @@ async def kg_community_summary( "generation_config": generation_config, "max_summary_input_length": max_summary_input_length, "collection_id": collection_id, + # "graph_id": graph_id, "logger": logger, } ), @@ -273,6 +628,17 @@ async def delete_graph_for_documents( # TODO: Implement this, as it needs some checks. raise NotImplementedError + @telemetry_event("delete_graph") + async def delete_graph( + self, + collection_id: UUID, + cascade: bool, + **kwargs, + ): + return await self.delete_graph_for_collection( + collection_id=collection_id, cascade=cascade + ) + @telemetry_event("delete_graph_for_collection") async def delete_graph_for_collection( self, @@ -280,8 +646,9 @@ async def delete_graph_for_collection( cascade: bool, **kwargs, ): - return await self.providers.database.delete_graph_for_collection( - collection_id, cascade + return await self.providers.database.graph_handler.delete_graph_for_collection( + collection_id=collection_id, + cascade=cascade, ) @telemetry_event("delete_node_via_document_id") @@ -291,85 +658,45 @@ async def delete_node_via_document_id( collection_id: UUID, **kwargs, ): - return await self.providers.database.delete_node_via_document_id( - collection_id, document_id + return await self.providers.database.graph_handler.delete_node_via_document_id( + document_id=document_id, + collection_id=collection_id, ) @telemetry_event("get_creation_estimate") async def get_creation_estimate( self, - collection_id: UUID, - kg_creation_settings: KGCreationSettings, + graph_creation_settings: KGCreationSettings, + document_id: Optional[UUID] = None, + collection_id: Optional[UUID] = None, **kwargs, ): - return await self.providers.database.get_creation_estimate( - collection_id, kg_creation_settings + return ( + await self.providers.database.graph_handler.get_creation_estimate( + document_id=document_id, + collection_id=collection_id, + graph_creation_settings=graph_creation_settings, + ) ) @telemetry_event("get_enrichment_estimate") async def get_enrichment_estimate( - self, - collection_id: UUID, - kg_enrichment_settings: KGEnrichmentSettings, - **kwargs, - ): - - return await self.providers.database.get_enrichment_estimate( - collection_id, kg_enrichment_settings - ) - - @telemetry_event("get_entities") - async def get_entities( self, collection_id: Optional[UUID] = None, - entity_ids: Optional[list[str]] = None, - entity_table_name: str = "document_entity", - offset: Optional[int] = None, - limit: Optional[int] = None, + graph_id: Optional[UUID] = None, + graph_enrichment_settings: KGEnrichmentSettings = KGEnrichmentSettings(), **kwargs, ): - return await self.providers.database.get_entities( - collection_id=collection_id, - entity_ids=entity_ids, - entity_table_name=entity_table_name, - offset=offset or 0, - limit=limit or -1, - ) - @telemetry_event("get_triples") - async def get_triples( - self, - collection_id: Optional[UUID] = None, - entity_names: Optional[list[str]] = None, - triple_ids: Optional[list[str]] = None, - offset: Optional[int] = None, - limit: Optional[int] = None, - **kwargs, - ): - return await self.providers.database.get_triples( - collection_id=collection_id, - entity_names=entity_names, - triple_ids=triple_ids, - offset=offset or 0, - limit=limit or -1, - ) + if graph_id is None and collection_id is None: + raise ValueError( + "Either graph_id or collection_id must be provided" + ) - @telemetry_event("get_communities") - async def get_communities( - self, - collection_id: Optional[UUID] = None, - levels: Optional[list[int]] = None, - community_numbers: Optional[list[int]] = None, - offset: Optional[int] = None, - limit: Optional[int] = None, - **kwargs, - ): - return await self.providers.database.get_communities( + return await self.providers.database.graph_handler.get_enrichment_estimate( collection_id=collection_id, - levels=levels, - community_numbers=community_numbers, - offset=offset or 0, - limit=limit or -1, + graph_id=graph_id, + graph_enrichment_settings=graph_enrichment_settings, ) @telemetry_event("get_deduplication_estimate") @@ -379,16 +706,18 @@ async def get_deduplication_estimate( kg_deduplication_settings: KGEntityDeduplicationSettings, **kwargs, ): - return await self.providers.database.get_deduplication_estimate( - collection_id, kg_deduplication_settings + return await self.providers.database.graph_handler.get_deduplication_estimate( + collection_id=collection_id, + kg_deduplication_settings=kg_deduplication_settings, ) @telemetry_event("kg_entity_deduplication") async def kg_entity_deduplication( self, collection_id: UUID, - kg_entity_deduplication_type: KGEntityDeduplicationType, - kg_entity_deduplication_prompt: str, + graph_id: UUID, + graph_entity_deduplication_type: KGEntityDeduplicationType, + graph_entity_deduplication_prompt: str, generation_config: GenerationConfig, **kwargs, ): @@ -396,8 +725,9 @@ async def kg_entity_deduplication( input=self.pipes.kg_entity_deduplication_pipe.Input( message={ "collection_id": collection_id, - "kg_entity_deduplication_type": kg_entity_deduplication_type, - "kg_entity_deduplication_prompt": kg_entity_deduplication_prompt, + "graph_id": graph_id, + "graph_entity_deduplication_type": graph_entity_deduplication_type, + "graph_entity_deduplication_prompt": graph_entity_deduplication_prompt, "generation_config": generation_config, **kwargs, } @@ -413,8 +743,8 @@ async def kg_entity_deduplication_summary( collection_id: UUID, offset: int, limit: int, - kg_entity_deduplication_type: KGEntityDeduplicationType, - kg_entity_deduplication_prompt: str, + graph_entity_deduplication_type: KGEntityDeduplicationType, + graph_entity_deduplication_prompt: str, generation_config: GenerationConfig, **kwargs, ): @@ -428,8 +758,8 @@ async def kg_entity_deduplication_summary( "collection_id": collection_id, "offset": offset, "limit": limit, - "kg_entity_deduplication_type": kg_entity_deduplication_type, - "kg_entity_deduplication_prompt": kg_entity_deduplication_prompt, + "graph_entity_deduplication_type": graph_entity_deduplication_type, + "graph_entity_deduplication_prompt": graph_entity_deduplication_prompt, "generation_config": generation_config, } ), @@ -473,7 +803,7 @@ async def tune_prompt( for document in documents: chunks_response = ( - await self.providers.database.get_document_chunks( + await self.providers.database.list_document_chunks( document.id, offset=chunks_offset, limit=chunks_limit, @@ -520,3 +850,325 @@ async def tune_prompt( ) return results[0] + + async def kg_extraction( # type: ignore + self, + document_id: UUID, + generation_config: GenerationConfig, + max_knowledge_relationships: int, + entity_types: list[str], + relation_types: list[str], + chunk_merge_count: int, + filter_out_existing_chunks: bool = True, + total_tasks: Optional[int] = None, + *args: Any, + **kwargs: Any, + ) -> AsyncGenerator[Union[KGExtraction, R2RDocumentProcessingError], None]: + start_time = time.time() + + logger.info( + f"KGExtractionPipe: Processing document {document_id} for KG extraction", + ) + + # Then create the extractions from the results + limit = 100 + offset = 0 + chunks = [] + while True: + chunk_req = await self.providers.database.list_document_chunks( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + document_id=document_id, + offset=offset, + limit=limit, + ) + + chunks.extend( + [ + DocumentChunk( + id=chunk["id"], + document_id=chunk["document_id"], + owner_id=chunk["owner_id"], + collection_ids=chunk["collection_ids"], + data=chunk["text"], + metadata=chunk["metadata"], + ) + for chunk in chunk_req["results"] + ] + ) + if len(chunk_req["results"]) < limit: + break + offset += limit + + logger.info(f"Found {len(chunks)} chunks for document {document_id}") + if len(chunks) == 0: + logger.info(f"No chunks found for document {document_id}") + raise R2RException( + message="No chunks found for document", + status_code=404, + ) + + if filter_out_existing_chunks: + existing_chunk_ids = await self.providers.database.graph_handler.get_existing_document_entity_chunk_ids( + document_id=document_id + ) + chunks = [ + chunk for chunk in chunks if chunk.id not in existing_chunk_ids + ] + logger.info( + f"Filtered out {len(existing_chunk_ids)} existing chunks, remaining {len(chunks)} chunks for document {document_id}" + ) + + if len(chunks) == 0: + logger.info(f"No extractions left for document {document_id}") + return + + logger.info( + f"KGExtractionPipe: Obtained {len(chunks)} chunks to process, time from start: {time.time() - start_time:.2f} seconds", + ) + + # sort the extractions accroding to chunk_order field in metadata in ascending order + chunks = sorted( + chunks, + key=lambda x: x.metadata.get("chunk_order", float("inf")), + ) + + # group these extractions into groups of chunk_merge_count + grouped_chunks = [ + chunks[i : i + chunk_merge_count] + for i in range(0, len(chunks), chunk_merge_count) + ] + + logger.info( + f"KGExtractionPipe: Extracting KG Relationships for document and created {len(grouped_chunks)} tasks, time from start: {time.time() - start_time:.2f} seconds", + ) + + tasks = [ + asyncio.create_task( + self._extract_kg( + chunks=chunk_group, + generation_config=generation_config, + max_knowledge_relationships=max_knowledge_relationships, + entity_types=entity_types, + relation_types=relation_types, + task_id=task_id, + total_tasks=len(grouped_chunks), + ) + ) + for task_id, chunk_group in enumerate(grouped_chunks) + ] + + completed_tasks = 0 + total_tasks = len(tasks) + + logger.info( + f"KGExtractionPipe: Waiting for {total_tasks} KG extraction tasks to complete", + ) + + for completed_task in asyncio.as_completed(tasks): + try: + yield await completed_task + completed_tasks += 1 + if completed_tasks % 100 == 0: + logger.info( + f"KGExtractionPipe: Completed {completed_tasks}/{total_tasks} KG extraction tasks", + ) + except Exception as e: + logger.error(f"Error in Extracting KG Relationships: {e}") + yield R2RDocumentProcessingError( + document_id=document_id, + error_message=str(e), + ) + + logger.info( + f"KGExtractionPipe: Completed {completed_tasks}/{total_tasks} KG extraction tasks, time from start: {time.time() - start_time:.2f} seconds", + ) + + async def _extract_kg( + self, + chunks: list[DocumentChunk], + generation_config: GenerationConfig, + max_knowledge_relationships: int, + entity_types: list[str], + relation_types: list[str], + retries: int = 5, + delay: int = 2, + task_id: Optional[int] = None, + total_tasks: Optional[int] = None, + ) -> KGExtraction: + """ + Extracts NER relationships from a extraction with retries. + """ + + # combine all extractions into a single string + combined_extraction: str = " ".join([chunk.data for chunk in chunks]) # type: ignore + + messages = await self.providers.database.prompt_handler.get_message_payload( + task_prompt_name=self.providers.database.config.graph_creation_settings.graphrag_relationships_extraction_few_shot, + task_inputs={ + "input": combined_extraction, + "max_knowledge_relationships": max_knowledge_relationships, + "entity_types": "\n".join(entity_types), + "relation_types": "\n".join(relation_types), + }, + ) + + for attempt in range(retries): + try: + response = await self.providers.llm.aget_completion( + messages, + generation_config=generation_config, + ) + + kg_extraction = response.choices[0].message.content + + if not kg_extraction: + raise R2RException( + "No knowledge graph extraction found in the response string, the selected LLM likely failed to format it's response correctly.", + 400, + ) + + entity_pattern = ( + r'\("entity"\${4}([^$]+)\${4}([^$]+)\${4}([^$]+)\)' + ) + relationship_pattern = r'\("relationship"\${4}([^$]+)\${4}([^$]+)\${4}([^$]+)\${4}([^$]+)\${4}(\d+(?:\.\d+)?)\)' + + async def parse_fn(response_str: str) -> Any: + entities = re.findall(entity_pattern, response_str) + + if ( + len(kg_extraction) + > MIN_VALID_KG_EXTRACTION_RESPONSE_LENGTH + and len(entities) == 0 + ): + raise R2RException( + f"No entities found in the response string, the selected LLM likely failed to format it's response correctly. {response_str}", + 400, + ) + + relationships = re.findall( + relationship_pattern, response_str + ) + + entities_arr = [] + for entity in entities: + entity_value = entity[0] + entity_category = entity[1] + entity_description = entity[2] + description_embedding = ( + await self.providers.embedding.async_get_embedding( + entity_description + ) + ) + entities_arr.append( + Entity( + category=entity_category, + description=entity_description, + name=entity_value, + parent_id=chunks[0].document_id, + chunk_ids=[chunk.id for chunk in chunks], + description_embedding=description_embedding, + attributes={}, + ) + ) + + relations_arr = [] + for relationship in relationships: + subject = relationship[0] + object = relationship[1] + predicate = relationship[2] + description = relationship[3] + weight = float(relationship[4]) + relationship_embedding = ( + await self.providers.embedding.async_get_embedding( + description + ) + ) + + # check if subject and object are in entities_dict + relations_arr.append( + Relationship( + subject=subject, + predicate=predicate, + object=object, + description=description, + weight=weight, + parent_id=chunks[0].document_id, + chunk_ids=[chunk.id for chunk in chunks], + attributes={}, + description_embedding=relationship_embedding, + ) + ) + + return entities_arr, relations_arr + + entities, relationships = await parse_fn(kg_extraction) + return KGExtraction( + entities=entities, + relationships=relationships, + ) + + except ( + Exception, + json.JSONDecodeError, + KeyError, + IndexError, + R2RException, + ) as e: + if attempt < retries - 1: + await asyncio.sleep(delay) + else: + print( + f"Failed after retries with for chunk {chunks[0].id} of document {chunks[0].document_id}: {e}" + ) + + print( + f"KGExtractionPipe: Completed task number {task_id} of {total_tasks} for document {chunks[0].document_id}", + ) + + return KGExtraction( + entities=[], + relationships=[], + ) + + async def store_kg_extractions( + self, + kg_extractions: list[KGExtraction], + ): + """ + Stores a batch of knowledge graph extractions in the graph database. + """ + + for extraction in kg_extractions: + entities_id_map = {} + for entity in extraction.entities: + result = await self.providers.database.graph_handler.entities.create( + name=entity.name, + parent_id=entity.parent_id, + store_type="documents", # type: ignore + category=entity.category, + description=entity.description, + description_embedding=entity.description_embedding, + chunk_ids=entity.chunk_ids, + metadata=entity.metadata, + ) + entities_id_map[entity.name] = result.id + + if extraction.relationships: + + for relationship in extraction.relationships: + await self.providers.database.graph_handler.relationships.create( + subject=relationship.subject, + subject_id=entities_id_map.get( + relationship.subject, None + ), + predicate=relationship.predicate, + object=relationship.object, + object_id=entities_id_map.get( + relationship.object, None + ), + parent_id=relationship.parent_id, + description=relationship.description, + description_embedding=relationship.description_embedding, + weight=relationship.weight, + metadata=relationship.metadata, + store_type="documents", # type: ignore + ) diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index 6004661f6..57fcaae01 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -1,8 +1,7 @@ import logging import os from collections import defaultdict -from importlib.metadata import version as get_version -from typing import Any, BinaryIO, Dict, Optional, Tuple, Union +from typing import Any, BinaryIO, Optional, Tuple from uuid import UUID import toml @@ -11,14 +10,15 @@ from core.base import ( AnalysisTypes, CollectionResponse, - DocumentInfo, + DocumentResponse, + KGEnrichmentStatus, LogFilterCriteria, LogProcessor, Message, Prompt, R2RException, RunManager, - UserResponse, + User, ) from core.base.logger.base import RunType from core.base.utils import validate_uuid @@ -56,8 +56,8 @@ def __init__( @telemetry_event("Logs") async def logs( self, - offset: int = 0, - limit: int = 100, + offset: int, + limit: int, run_type_filter: Optional[RunType] = None, ): if self.logging_connection is None: @@ -192,7 +192,7 @@ async def analytics( } @telemetry_event("AppSettings") - async def app_settings(self, *args: Any, **kwargs: Any): + async def app_settings(self): prompts = await self.providers.database.get_all_prompts() config_toml = self.config.to_toml() config_dict = toml.loads(config_toml) @@ -206,16 +206,16 @@ async def app_settings(self, *args: Any, **kwargs: Any): @telemetry_event("UsersOverview") async def users_overview( self, + offset: int, + limit: int, user_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = 100, *args, **kwargs, ): return await self.providers.database.get_users_overview( - user_ids, offset=offset, limit=limit, + user_ids=user_ids, ) @telemetry_event("Delete") @@ -233,13 +233,16 @@ async def delete( NOTE: This method is not atomic and may result in orphaned entries in the documents overview table. NOTE: This method assumes that filters delete entire contents of any touched documents. """ + ### TODO - FIX THIS, ENSURE THAT DOCUMENTS OVERVIEW IS CLEARED def validate_filters(filters: dict[str, Any]) -> None: ALLOWED_FILTERS = { - "document_id", - "user_id", + "id", "collection_ids", - "extraction_id", + "chunk_id", + # TODO - Modify these checks such that they can be used PROPERLY for nested filters + "$and", + "$or", } if not filters: @@ -254,7 +257,7 @@ def validate_filters(filters: dict[str, Any]) -> None: message=f"Invalid filter field: {field}", ) - for field in ["document_id", "user_id", "extraction_id"]: + for field in ["document_id", "user_id", "chunk_id"]: if field in filters: op = next(iter(filters[field].keys())) try: @@ -295,23 +298,44 @@ def validate_filters(filters: dict[str, Any]) -> None: if result.get("document_id") ) - relational_filters = {} - if "document_id" in filters: - relational_filters["filter_document_ids"] = [ - filters["document_id"]["$eq"] - ] - if "user_id" in filters: - relational_filters["filter_user_ids"] = [filters["user_id"]["$eq"]] - if "collection_ids" in filters: - relational_filters["filter_collection_ids"] = list( - filters["collection_ids"]["$in"] - ) + # TODO: This might be appropriate to move elsewhere and revisit filter logic in other methods + def extract_filters(filters: dict[str, Any]) -> dict[str, list[str]]: + relational_filters: dict = {} + + def process_filter(filter_dict: dict[str, Any]): + if "document_id" in filter_dict: + relational_filters.setdefault( + "filter_document_ids", [] + ).append(filter_dict["document_id"]["$eq"]) + if "user_id" in filter_dict: + relational_filters.setdefault( + "filter_user_ids", [] + ).append(filter_dict["user_id"]["$eq"]) + if "collection_ids" in filter_dict: + relational_filters.setdefault( + "filter_collection_ids", [] + ).extend(filter_dict["collection_ids"]["$in"]) + + # Handle nested conditions + if "$and" in filters: + for condition in filters["$and"]: + process_filter(condition) + elif "$or" in filters: + for condition in filters["$or"]: + process_filter(condition) + else: + process_filter(filters) + + return relational_filters + relational_filters = extract_filters(filters) if relational_filters: try: documents_overview = ( - await self.providers.database.get_documents_overview( - **relational_filters # type: ignore + await self.providers.database.get_documents_overview( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + offset=0, + limit=1000, + **relational_filters, # type: ignore ) )["results"] except Exception as e: @@ -331,10 +355,10 @@ def validate_filters(filters: dict[str, Any]) -> None: ) for document_id in document_ids_to_purge: - remaining_chunks = ( - await self.providers.database.get_document_chunks( - document_id - ) + remaining_chunks = await self.providers.database.list_document_chunks( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + document_id=document_id, + offset=0, + limit=1000, ) if remaining_chunks["total_entries"] == 0: try: @@ -348,6 +372,31 @@ def validate_filters(filters: dict[str, Any]) -> None: logger.error( f"Error deleting document ID {document_id} from documents_overview: {e}" ) + await self.providers.database.graph_handler.entities.delete( + parent_id=document_id, + store_type="documents", # type: ignore + ) + await self.providers.database.graph_handler.relationships.delete( + parent_id=document_id, + store_type="documents", # type: ignore + ) + collections = ( + await self.providers.database.get_collections_overview( + offset=0, limit=1000, filter_document_ids=[document_id] + ) + ) + # TODO - Loop over all collections + for collection in collections["results"]: + await self.providers.database.set_workflow_status( + id=collection.id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.OUTDATED, + ) + await self.providers.database.set_workflow_status( + id=collection.id, + status_type="graph_cluster_status", + status=KGEnrichmentStatus.OUTDATED, + ) return None @@ -362,34 +411,34 @@ async def download_file( @telemetry_event("DocumentsOverview") async def documents_overview( self, + offset: int, + limit: int, user_ids: Optional[list[UUID]] = None, collection_ids: Optional[list[UUID]] = None, document_ids: Optional[list[UUID]] = None, - offset: Optional[int] = None, - limit: Optional[int] = None, *args: Any, **kwargs: Any, ): return await self.providers.database.get_documents_overview( + offset=offset, + limit=limit, filter_document_ids=document_ids, filter_user_ids=user_ids, filter_collection_ids=collection_ids, - offset=offset or 0, - limit=limit or -1, ) @telemetry_event("DocumentChunks") - async def document_chunks( + async def list_document_chunks( self, document_id: UUID, - offset: int = 0, - limit: int = 100, + offset: int, + limit: int, include_vectors: bool = False, *args, **kwargs, ): - return await self.providers.database.get_document_chunks( - document_id, + return await self.providers.database.list_document_chunks( + document_id=document_id, offset=offset, limit=limit, include_vectors=include_vectors, @@ -405,6 +454,17 @@ async def assign_document_to_collection( await self.providers.database.assign_document_to_collection_relational( document_id, collection_id ) + await self.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_sync_status", + status=KGEnrichmentStatus.OUTDATED, + ) + await self.providers.database.set_workflow_status( + id=collection_id, + status_type="graph_cluster_status", + status=KGEnrichmentStatus.OUTDATED, + ) + return {"message": "Document assigned to collection successfully"} @telemetry_event("RemoveDocumentFromCollection") @@ -417,24 +477,16 @@ async def remove_document_from_collection( await self.providers.database.remove_document_from_collection_vector( document_id, collection_id ) - await self.providers.database.delete_node_via_document_id( + await self.providers.database.graph_handler.delete_node_via_document_id( document_id, collection_id ) return None - @telemetry_event("DocumentCollections") - async def document_collections( - self, document_id: UUID, offset: int = 0, limit: int = 100 - ): - return await self.providers.database.document_collections( - document_id, offset=offset, limit=limit - ) - def _process_relationships( self, relationships: list[Tuple[str, str, str]] - ) -> Tuple[Dict[str, list[str]], Dict[str, Dict[str, list[str]]]]: + ) -> Tuple[dict[str, list[str]], dict[str, dict[str, list[str]]]]: graph = defaultdict(list) - grouped: Dict[str, Dict[str, list[str]]] = defaultdict( + grouped: dict[str, dict[str, list[str]]] = defaultdict( lambda: defaultdict(list) ) for subject, relation, obj in relationships: @@ -446,9 +498,9 @@ def _process_relationships( def generate_output( self, - grouped_relationships: Dict[str, Dict[str, list[str]]], - graph: Dict[str, list[str]], - descriptions_dict: Dict[str, str], + grouped_relationships: dict[str, dict[str, list[str]]], + graph: dict[str, list[str]], + descriptions_dict: dict[str, str], print_descriptions: bool = True, ) -> list[str]: output = [] @@ -490,7 +542,7 @@ def generate_output( return output - def _count_connected_components(self, graph: Dict[str, list[str]]) -> int: + def _count_connected_components(self, graph: dict[str, list[str]]) -> int: visited = set() components = 0 @@ -508,7 +560,7 @@ def dfs(node): return components def _get_central_nodes( - self, graph: Dict[str, list[str]] + self, graph: dict[str, list[str]] ) -> list[Tuple[str, float]]: degree = {node: len(neighbors) for node, neighbors in graph.items()} total_nodes = len(graph) @@ -519,15 +571,23 @@ def _get_central_nodes( @telemetry_event("CreateCollection") async def create_collection( - self, name: str, description: str = "" + self, + owner_id: UUID, + name: Optional[str] = None, + description: str = "", ) -> CollectionResponse: - return await self.providers.database.create_collection( - name, description + result = await self.providers.database.create_collection( + owner_id=owner_id, + name=name, + description=description, + ) + graph_result = await self.providers.database.graph_handler.create( + collection_id=result.id, + name=name, + description=description, ) - @telemetry_event("GetCollection") - async def get_collection(self, collection_id: UUID) -> CollectionResponse: - return await self.providers.database.get_collection(collection_id) + return result @telemetry_event("UpdateCollection") async def update_collection( @@ -537,7 +597,9 @@ async def update_collection( description: Optional[str] = None, ) -> CollectionResponse: return await self.providers.database.update_collection( - collection_id, name, description + collection_id=collection_id, + name=name, + description=description, ) @telemetry_event("DeleteCollection") @@ -549,17 +611,26 @@ async def delete_collection(self, collection_id: UUID) -> bool: return True @telemetry_event("ListCollections") - async def list_collections( - self, offset: int = 0, limit: int = 100 + async def collections_overview( + self, + offset: int, + limit: int, + user_ids: Optional[list[UUID]] = None, + document_ids: Optional[list[UUID]] = None, + collection_ids: Optional[list[UUID]] = None, ) -> dict[str, list[CollectionResponse] | int]: - return await self.providers.database.list_collections( - offset=offset, limit=limit + return await self.providers.database.get_collections_overview( + offset=offset, + limit=limit, + filter_user_ids=user_ids, + filter_document_ids=document_ids, + filter_collection_ids=collection_ids, ) @telemetry_event("AddUserToCollection") async def add_user_to_collection( self, user_id: UUID, collection_id: UUID - ) -> None: + ) -> bool: return await self.providers.database.add_user_to_collection( user_id, collection_id ) @@ -567,7 +638,7 @@ async def add_user_to_collection( @telemetry_event("RemoveUserFromCollection") async def remove_user_from_collection( self, user_id: UUID, collection_id: UUID - ) -> None: + ) -> bool: return await self.providers.database.remove_user_from_collection( user_id, collection_id ) @@ -575,38 +646,15 @@ async def remove_user_from_collection( @telemetry_event("GetUsersInCollection") async def get_users_in_collection( self, collection_id: UUID, offset: int = 0, limit: int = 100 - ) -> dict[str, list[UserResponse] | int]: + ) -> dict[str, list[User] | int]: return await self.providers.database.get_users_in_collection( collection_id, offset=offset, limit=limit ) - @telemetry_event("GetCollectionsForUser") - async def get_collections_for_user( - self, user_id: UUID, offset: int = 0, limit: int = 100 - ) -> dict[str, list[CollectionResponse] | int]: - return await self.providers.database.get_collections_for_user( - user_id, offset, limit - ) - - @telemetry_event("CollectionsOverview") - async def collections_overview( - self, - collection_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = 100, - *args, - **kwargs, - ): - return await self.providers.database.get_collections_overview( - collection_ids, - offset=offset, - limit=limit, - ) - @telemetry_event("GetDocumentsInCollection") async def documents_in_collection( self, collection_id: UUID, offset: int = 0, limit: int = 100 - ) -> dict[str, Union[list[DocumentInfo], int]]: + ) -> dict[str, list[DocumentResponse] | int]: return await self.providers.database.documents_in_collection( collection_id, offset=offset, limit=limit ) @@ -619,12 +667,12 @@ async def add_prompt( await self.providers.database.add_prompt( name, template, input_types ) - return {"message": f"Prompt '{name}' added successfully."} + return f"Prompt '{name}' added successfully." # type: ignore except ValueError as e: raise R2RException(status_code=400, message=str(e)) @telemetry_event("GetPrompt") - async def get_prompt( + async def get_cached_prompt( self, prompt_name: str, inputs: Optional[dict[str, Any]] = None, @@ -633,7 +681,7 @@ async def get_prompt( try: return { "message": ( - await self.providers.database.get_prompt( + await self.providers.database.get_cached_prompt( prompt_name, inputs, prompt_override ) ) @@ -641,6 +689,22 @@ async def get_prompt( except ValueError as e: raise R2RException(status_code=404, message=str(e)) + @telemetry_event("GetPrompt") + async def get_prompt( + self, + prompt_name: str, + inputs: Optional[dict[str, Any]] = None, + prompt_override: Optional[str] = None, + ) -> dict: + try: + return await self.providers.database.get_prompt( # type: ignore + prompt_name=prompt_name, + inputs=inputs, + prompt_override=prompt_override, + ) + except ValueError as e: + raise R2RException(status_code=404, message=str(e)) + @telemetry_event("GetAllPrompts") async def get_all_prompts(self) -> dict[str, Prompt]: return await self.providers.database.get_all_prompts() @@ -656,7 +720,7 @@ async def update_prompt( await self.providers.database.update_prompt( name, template, input_types ) - return {"message": f"Prompt '{name}' updated successfully."} + return f"Prompt '{name}' updated successfully." # type: ignore except ValueError as e: raise R2RException(status_code=404, message=str(e)) @@ -689,25 +753,25 @@ async def verify_conversation_access( @telemetry_event("CreateConversation") async def create_conversation( self, user_id: Optional[UUID] = None, auth_user=None - ) -> str: - return await self.logging_connection.create_conversation( + ) -> dict: + return await self.logging_connection.create_conversation( # type: ignore user_id=user_id ) @telemetry_event("ConversationsOverview") async def conversations_overview( self, + offset: int, + limit: int, conversation_ids: Optional[list[UUID]] = None, user_ids: Optional[UUID | list[UUID]] = None, - offset: int = 0, - limit: int = 100, auth_user=None, - ) -> dict[str, Union[list[dict], int]]: + ) -> dict[str, list[dict] | int]: return await self.logging_connection.get_conversations_overview( - conversation_ids=conversation_ids, - user_ids=user_ids, offset=offset, limit=limit, + user_ids=user_ids, + conversation_ids=conversation_ids, ) @telemetry_event("AddMessage") @@ -742,17 +806,23 @@ async def update_message_metadata( @telemetry_event("exportMessagesToCSV") async def export_messages_to_csv( self, chunk_size: int = 1000, return_type: str = "stream" - ) -> Union[StreamingResponse, str]: + ) -> StreamingResponse | str: return await self.logging_connection.export_messages_to_csv( chunk_size, return_type ) @telemetry_event("BranchesOverview") async def branches_overview( - self, conversation_id: str, auth_user=None - ) -> list[Dict]: - return await self.logging_connection.get_branches_overview( - conversation_id + self, + offset: int, + limit: int, + conversation_id: str, + auth_user=None, + ) -> list[dict]: + return await self.logging_connection.get_branches( # type: ignore + offset=offset, + limit=limit, + conversation_id=conversation_id, ) @telemetry_event("GetNextBranch") diff --git a/py/core/main/services/retrieval_service.py b/py/core/main/services/retrieval_service.py index e1c1ba009..1657d6559 100644 --- a/py/core/main/services/retrieval_service.py +++ b/py/core/main/services/retrieval_service.py @@ -8,10 +8,10 @@ from core import R2RStreamingRAGAgent from core.base import ( - DocumentInfo, + DocumentResponse, EmbeddingPurpose, GenerationConfig, - KGSearchSettings, + GraphSearchSettings, Message, R2RException, RunManager, @@ -19,7 +19,7 @@ manage_run, to_async_generator, ) -from core.base.api.models import RAGResponse, SearchResponse, UserResponse +from core.base.api.models import CombinedSearchResponse, RAGResponse, User from core.base.logger.base import RunType from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider from core.telemetry.telemetry_decorator import telemetry_event @@ -53,27 +53,18 @@ def __init__( ) @telemetry_event("Search") - async def search( + async def search( # TODO - rename to 'search_chunks' self, query: str, - vector_search_settings: SearchSettings = SearchSettings(), - kg_search_settings: KGSearchSettings = KGSearchSettings(), + search_settings: SearchSettings = SearchSettings(), *args, **kwargs, - ) -> SearchResponse: + ) -> CombinedSearchResponse: async with manage_run(self.run_manager, RunType.RETRIEVAL) as run_id: t0 = time.time() - if ( - kg_search_settings.use_kg_search - and self.config.database.kg_search_settings is False - ): - raise R2RException( - status_code=400, - message="Knowledge Graph search is not enabled in the configuration.", - ) if ( - vector_search_settings.use_vector_search + search_settings.use_semantic_search and self.config.database.provider is None ): raise R2RException( @@ -82,23 +73,24 @@ async def search( ) if ( - vector_search_settings.use_vector_search - and vector_search_settings.use_hybrid_search - and not vector_search_settings.hybrid_search_settings - ): + ( + search_settings.use_semantic_search + and search_settings.use_fulltext_search + ) + or search_settings.use_hybrid_search + ) and not search_settings.hybrid_settings: raise R2RException( status_code=400, message="Hybrid search settings must be specified in the input configuration.", ) # TODO - Remove these transforms once we have a better way to handle this - for filter, value in vector_search_settings.filters.items(): + for filter, value in search_settings.filters.items(): if isinstance(value, UUID): - vector_search_settings.filters[filter] = str(value) + search_settings.filters[filter] = str(value) merged_kwargs = { "input": to_async_generator([query]), "state": None, - "vector_search_settings": vector_search_settings, - "kg_search_settings": kg_search_settings, + "search_settings": search_settings, "run_manager": self.run_manager, **kwargs, } @@ -124,7 +116,7 @@ async def search_documents( query: str, settings: SearchSettings, query_embedding: Optional[list[float]] = None, - ) -> list[DocumentInfo]: + ) -> list[DocumentResponse]: return await self.providers.database.search_documents( query_text=query, @@ -147,13 +139,19 @@ async def completion( **kwargs, ) + @telemetry_event("Embedding") + async def embedding( + self, + text: str, + ): + return await self.providers.embedding.async_get_embedding(text=text) + @telemetry_event("RAG") async def rag( self, query: str, rag_generation_config: GenerationConfig, - vector_search_settings: SearchSettings = SearchSettings(), - kg_search_settings: KGSearchSettings = KGSearchSettings(), + search_settings: SearchSettings = SearchSettings(), *args, **kwargs, ) -> RAGResponse: @@ -163,16 +161,15 @@ async def rag( for ( filter, value, - ) in vector_search_settings.filters.items(): + ) in search_settings.filters.items(): if isinstance(value, UUID): - vector_search_settings.filters[filter] = str(value) + search_settings.filters[filter] = str(value) if rag_generation_config.stream: return await self.stream_rag_response( query, rag_generation_config, - vector_search_settings, - kg_search_settings, + search_settings, *args, **kwargs, ) @@ -180,8 +177,7 @@ async def rag( merged_kwargs = { "input": to_async_generator([query]), "state": None, - "vector_search_settings": vector_search_settings, - "kg_search_settings": kg_search_settings, + "search_settings": search_settings, "run_manager": self.run_manager, "rag_generation_config": rag_generation_config, **kwargs, @@ -219,8 +215,7 @@ async def stream_rag_response( self, query, rag_generation_config, - vector_search_settings, - kg_search_settings, + search_settings, *args, **kwargs, ): @@ -230,8 +225,7 @@ async def stream_response(): "input": to_async_generator([query]), "state": None, "run_manager": self.run_manager, - "vector_search_settings": vector_search_settings, - "kg_search_settings": kg_search_settings, + "search_settings": search_settings, "rag_generation_config": rag_generation_config, **kwargs, } @@ -250,8 +244,7 @@ async def stream_response(): async def agent( self, rag_generation_config: GenerationConfig, - vector_search_settings: SearchSettings = SearchSettings(), - kg_search_settings: KGSearchSettings = KGSearchSettings(), + search_settings: SearchSettings = SearchSettings(), task_prompt_override: Optional[str] = None, include_title_if_available: Optional[bool] = False, conversation_id: Optional[str] = None, @@ -278,9 +271,9 @@ async def agent( ) # Transform UUID filters to strings - for filter, value in vector_search_settings.filters.items(): + for filter, value in search_settings.filters.items(): if isinstance(value, UUID): - vector_search_settings.filters[filter] = str(value) + search_settings.filters[filter] = str(value) ids = None @@ -305,38 +298,39 @@ async def agent( status_code=404, message=f"Conversation not found: {conversation_id}", ) - messages = [conv[1] for conv in conversation] + [ # type: ignore + messages = [resp.message for resp in conversation] + [ # type: ignore message ] - ids = [conv[0] for conv in conversation] + ids = [resp.id for resp in conversation] else: - conversation_id = ( - await self.logging_connection.create_conversation() - ) - messages = [message] # type: ignore - else: - if not conversation_id: - conversation_id = ( + conversation = ( await self.logging_connection.create_conversation() ) + conversation_id = conversation["id"] parent_id = None - for inner_message in messages[:-1]: - - parent_id = ( - await self.logging_connection.add_message( - conversation_id, inner_message, parent_id + if conversation_id and messages: + for inner_message in messages[:-1]: + parent_id = await self.logging_connection.add_message( + conversation_id, # Use the stored conversation_id + inner_message, + parent_id, ) - ) + messages = messages or [] + + if message and not messages: + messages = [message] current_message = messages[-1] # type: ignore # Save the new message to the conversation - message_id = await self.logging_connection.add_message( + message = await self.logging_connection.add_message( conversation_id, # type: ignore current_message, # type: ignore parent_id=str(ids[-2]) if (ids and len(ids) > 1) else None, # type: ignore ) + if message is not None: + message_id = message["id"] # type: ignore if rag_generation_config.stream: t1 = time.time() @@ -359,8 +353,7 @@ async def stream_response(): async for chunk in agent.arun( messages=messages, system_instruction=task_prompt_override, - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, + search_settings=search_settings, rag_generation_config=rag_generation_config, include_title_if_available=include_title_if_available, *args, @@ -373,16 +366,15 @@ async def stream_response(): results = await self.agents.rag_agent.arun( messages=messages, system_instruction=task_prompt_override, - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, + search_settings=search_settings, rag_generation_config=rag_generation_config, include_title_if_available=include_title_if_available, *args, **kwargs, ) await self.logging_connection.add_message( - conversation_id, - Message(**results[-1]), + conversation_id=conversation_id, + content=Message(**results[-1]), parent_id=message_id, ) @@ -396,7 +388,9 @@ async def stream_response(): ) return { "messages": results, - "conversation_id": conversation_id, + "conversation_id": str( + conversation_id + ), # Ensure it's a string } except Exception as e: @@ -420,19 +414,17 @@ def _parse_user_data(user_data): user_data = json.loads(user_data) except json.JSONDecodeError: raise ValueError(f"Invalid user data format: {user_data}") - return UserResponse.from_dict(user_data) + return User.from_dict(user_data) @staticmethod def prepare_search_input( query: str, - vector_search_settings: SearchSettings, - kg_search_settings: KGSearchSettings, - user: UserResponse, + search_settings: SearchSettings, + user: User, ) -> dict: return { "query": query, - "vector_search_settings": vector_search_settings.to_dict(), - "kg_search_settings": kg_search_settings.to_dict(), + "search_settings": search_settings.to_dict(), "user": user.to_dict(), } @@ -440,11 +432,8 @@ def prepare_search_input( def parse_search_input(data: dict): return { "query": data["query"], - "vector_search_settings": SearchSettings.from_dict( - data["vector_search_settings"] - ), - "kg_search_settings": KGSearchSettings.from_dict( - data["kg_search_settings"] + "search_settings": SearchSettings.from_dict( + data["search_settings"] ), "user": RetrievalServiceAdapter._parse_user_data(data["user"]), } @@ -452,16 +441,14 @@ def parse_search_input(data: dict): @staticmethod def prepare_rag_input( query: str, - vector_search_settings: SearchSettings, - kg_search_settings: KGSearchSettings, + search_settings: SearchSettings, rag_generation_config: GenerationConfig, task_prompt_override: Optional[str], - user: UserResponse, + user: User, ) -> dict: return { "query": query, - "vector_search_settings": vector_search_settings.to_dict(), - "kg_search_settings": kg_search_settings.to_dict(), + "search_settings": search_settings.to_dict(), "rag_generation_config": rag_generation_config.to_dict(), "task_prompt_override": task_prompt_override, "user": user.to_dict(), @@ -471,11 +458,8 @@ def prepare_rag_input( def parse_rag_input(data: dict): return { "query": data["query"], - "vector_search_settings": SearchSettings.from_dict( - data["vector_search_settings"] - ), - "kg_search_settings": KGSearchSettings.from_dict( - data["kg_search_settings"] + "search_settings": SearchSettings.from_dict( + data["search_settings"] ), "rag_generation_config": GenerationConfig.from_dict( data["rag_generation_config"] @@ -487,19 +471,17 @@ def parse_rag_input(data: dict): @staticmethod def prepare_agent_input( message: Message, - vector_search_settings: SearchSettings, - kg_search_settings: KGSearchSettings, + search_settings: SearchSettings, rag_generation_config: GenerationConfig, task_prompt_override: Optional[str], include_title_if_available: bool, - user: UserResponse, + user: User, conversation_id: Optional[str] = None, branch_id: Optional[str] = None, ) -> dict: return { "message": message.to_dict(), - "vector_search_settings": vector_search_settings.to_dict(), - "kg_search_settings": kg_search_settings.to_dict(), + "search_settings": search_settings.to_dict(), "rag_generation_config": rag_generation_config.to_dict(), "task_prompt_override": task_prompt_override, "include_title_if_available": include_title_if_available, @@ -512,11 +494,8 @@ def prepare_agent_input( def parse_agent_input(data: dict): return { "message": Message.from_dict(data["message"]), - "vector_search_settings": SearchSettings.from_dict( - data["vector_search_settings"] - ), - "kg_search_settings": KGSearchSettings.from_dict( - data["kg_search_settings"] + "search_settings": SearchSettings.from_dict( + data["search_settings"] ), "rag_generation_config": GenerationConfig.from_dict( data["rag_generation_config"] diff --git a/py/core/parsers/media/docx_parser.py b/py/core/parsers/media/docx_parser.py index 86c242115..da78f3d4c 100644 --- a/py/core/parsers/media/docx_parser.py +++ b/py/core/parsers/media/docx_parser.py @@ -1,7 +1,6 @@ from io import BytesIO from typing import AsyncGenerator -from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, @@ -10,7 +9,7 @@ ) -class DOCXParser(AsyncParser[DataType]): +class DOCXParser(AsyncParser[str | bytes]): """A parser for DOCX data.""" def __init__( @@ -32,7 +31,7 @@ def __init__( "Error, `python-docx` is required to run `DOCXParser`. Please install it using `pip install python-docx`." ) - async def ingest(self, data: DataType, *args, **kwargs) -> AsyncGenerator[str, None]: # type: ignore + async def ingest(self, data: str | bytes, *args, **kwargs) -> AsyncGenerator[str, None]: # type: ignore """Ingest DOCX data and yield text from each paragraph.""" if isinstance(data, str): raise ValueError("DOCX data must be in bytes format.") diff --git a/py/core/parsers/media/img_parser.py b/py/core/parsers/media/img_parser.py index 3d7793ae0..1cff49cc7 100644 --- a/py/core/parsers/media/img_parser.py +++ b/py/core/parsers/media/img_parser.py @@ -2,7 +2,7 @@ import logging from typing import AsyncGenerator -from core.base.abstractions import DataType, GenerationConfig +from core.base.abstractions import GenerationConfig from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, @@ -13,7 +13,7 @@ logger = logging.getLogger() -class ImageParser(AsyncParser[DataType]): +class ImageParser(AsyncParser[str | bytes]): """A parser for image data using vision models.""" def __init__( @@ -38,7 +38,7 @@ def __init__( ) async def ingest( # type: ignore - self, data: DataType, **kwargs + self, data: str | bytes, **kwargs ) -> AsyncGenerator[str, None]: """ Ingest image data and yield a description using vision model. @@ -51,7 +51,7 @@ async def ingest( # type: ignore Chunks of image description text """ if not self.vision_prompt_text: - self.vision_prompt_text = await self.database_provider.get_prompt( # type: ignore + self.vision_prompt_text = await self.database_provider.get_cached_prompt( # type: ignore prompt_name=self.config.vision_img_prompt_name ) try: diff --git a/py/core/parsers/media/pdf_parser.py b/py/core/parsers/media/pdf_parser.py index 976ed5168..2fa6ce0b7 100644 --- a/py/core/parsers/media/pdf_parser.py +++ b/py/core/parsers/media/pdf_parser.py @@ -11,7 +11,7 @@ import aiofiles from pdf2image import convert_from_path -from core.base.abstractions import DataType, GenerationConfig +from core.base.abstractions import GenerationConfig from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, @@ -22,7 +22,7 @@ logger = logging.getLogger() -class VLMPDFParser(AsyncParser[DataType]): +class VLMPDFParser(AsyncParser[str | bytes]): """A parser for PDF documents using vision models for page processing.""" def __init__( @@ -124,7 +124,7 @@ async def process_page( raise async def ingest( - self, data: DataType, maintain_order: bool = False, **kwargs + self, data: str | bytes, maintain_order: bool = False, **kwargs ) -> AsyncGenerator[dict[str, str], None]: """ Ingest PDF data and yield descriptions for each page using vision model. @@ -138,7 +138,7 @@ async def ingest( Dict containing page number and content for each processed page """ if not self.vision_prompt_text: - self.vision_prompt_text = await self.database_provider.get_prompt( # type: ignore + self.vision_prompt_text = await self.database_provider.get_cached_prompt( # type: ignore prompt_name=self.config.vision_pdf_prompt_name ) @@ -206,7 +206,7 @@ async def ingest( os.rmdir(temp_dir) -class BasicPDFParser(AsyncParser[DataType]): +class BasicPDFParser(AsyncParser[str | bytes]): """A parser for PDF data.""" def __init__( @@ -228,7 +228,7 @@ def __init__( ) async def ingest( - self, data: DataType, **kwargs + self, data: str | bytes, **kwargs ) -> AsyncGenerator[str, None]: """Ingest PDF data and yield text from each page.""" if isinstance(data, str): @@ -265,7 +265,7 @@ async def ingest( yield page_text -class PDFParserUnstructured(AsyncParser[DataType]): +class PDFParserUnstructured(AsyncParser[str | bytes]): def __init__( self, config: IngestionConfig, @@ -291,7 +291,7 @@ def __init__( async def ingest( self, - data: DataType, + data: str | bytes, partition_strategy: str = "hi_res", chunking_strategy="by_title", ) -> AsyncGenerator[str, None]: diff --git a/py/core/parsers/media/ppt_parser.py b/py/core/parsers/media/ppt_parser.py index 6fa8f52e9..a8061c582 100644 --- a/py/core/parsers/media/ppt_parser.py +++ b/py/core/parsers/media/ppt_parser.py @@ -1,7 +1,6 @@ from io import BytesIO from typing import AsyncGenerator -from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, @@ -10,7 +9,7 @@ ) -class PPTParser(AsyncParser[DataType]): +class PPTParser(AsyncParser[str | bytes]): """A parser for PPT data.""" def __init__( @@ -31,7 +30,7 @@ def __init__( "Error, `python-pptx` is required to run `PPTParser`. Please install it using `pip install python-pptx`." ) - async def ingest(self, data: DataType, **kwargs) -> AsyncGenerator[str, None]: # type: ignore + async def ingest(self, data: str | bytes, **kwargs) -> AsyncGenerator[str, None]: # type: ignore """Ingest PPT data and yield text from each slide.""" if isinstance(data, str): raise ValueError("PPT data must be in bytes format.") diff --git a/py/core/parsers/structured/csv_parser.py b/py/core/parsers/structured/csv_parser.py index c8418f5a1..fcadaa1a4 100644 --- a/py/core/parsers/structured/csv_parser.py +++ b/py/core/parsers/structured/csv_parser.py @@ -1,7 +1,6 @@ # type: ignore from typing import IO, AsyncGenerator, Optional, Union -from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, @@ -10,7 +9,7 @@ ) -class CSVParser(AsyncParser[DataType]): +class CSVParser(AsyncParser[str | bytes]): """A parser for CSV data.""" def __init__( @@ -40,7 +39,7 @@ async def ingest( yield ", ".join(row) -class CSVParserAdvanced(AsyncParser[DataType]): +class CSVParserAdvanced(AsyncParser[str | bytes]): """A parser for CSV data.""" def __init__( diff --git a/py/core/parsers/structured/json_parser.py b/py/core/parsers/structured/json_parser.py index 1efe29c78..4d6daa9de 100644 --- a/py/core/parsers/structured/json_parser.py +++ b/py/core/parsers/structured/json_parser.py @@ -3,7 +3,6 @@ import json from typing import AsyncGenerator -from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, @@ -12,7 +11,7 @@ ) -class JSONParser(AsyncParser[DataType]): +class JSONParser(AsyncParser[str | bytes]): """A parser for JSON data.""" def __init__( @@ -26,7 +25,7 @@ def __init__( self.config = config async def ingest( - self, data: DataType, *args, **kwargs + self, data: str | bytes, *args, **kwargs ) -> AsyncGenerator[str, None]: """ Ingest JSON data and yield a formatted text representation. diff --git a/py/core/parsers/structured/xlsx_parser.py b/py/core/parsers/structured/xlsx_parser.py index e06a22d73..65ada74a4 100644 --- a/py/core/parsers/structured/xlsx_parser.py +++ b/py/core/parsers/structured/xlsx_parser.py @@ -2,7 +2,6 @@ from io import BytesIO from typing import AsyncGenerator -from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, @@ -11,7 +10,7 @@ ) -class XLSXParser(AsyncParser[DataType]): +class XLSXParser(AsyncParser[str | bytes]): """A parser for XLSX data.""" def __init__( @@ -45,7 +44,7 @@ async def ingest( yield ", ".join(map(str, row)) -class XLSXParserAdvanced(AsyncParser[DataType]): +class XLSXParserAdvanced(AsyncParser[str | bytes]): """A parser for XLSX data.""" # identifies connected components in the excel graph and extracts data from each component diff --git a/py/core/parsers/text/html_parser.py b/py/core/parsers/text/html_parser.py index 6f3e146c6..a04331e02 100644 --- a/py/core/parsers/text/html_parser.py +++ b/py/core/parsers/text/html_parser.py @@ -3,7 +3,6 @@ from bs4 import BeautifulSoup -from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, @@ -12,7 +11,7 @@ ) -class HTMLParser(AsyncParser[DataType]): +class HTMLParser(AsyncParser[str | bytes]): """A parser for HTML data.""" def __init__( @@ -26,7 +25,7 @@ def __init__( self.config = config async def ingest( - self, data: DataType, *args, **kwargs + self, data: str | bytes, *args, **kwargs ) -> AsyncGenerator[str, None]: """Ingest HTML data and yield text.""" soup = BeautifulSoup(data, "html.parser") diff --git a/py/core/parsers/text/md_parser.py b/py/core/parsers/text/md_parser.py index 2a181fbf9..7ab11d920 100644 --- a/py/core/parsers/text/md_parser.py +++ b/py/core/parsers/text/md_parser.py @@ -3,7 +3,6 @@ from bs4 import BeautifulSoup -from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, @@ -12,7 +11,7 @@ ) -class MDParser(AsyncParser[DataType]): +class MDParser(AsyncParser[str | bytes]): """A parser for Markdown data.""" def __init__( @@ -30,7 +29,7 @@ def __init__( self.markdown = markdown async def ingest( - self, data: DataType, *args, **kwargs + self, data: str | bytes, *args, **kwargs ) -> AsyncGenerator[str, None]: """Ingest Markdown data and yield text.""" if isinstance(data, bytes): diff --git a/py/core/parsers/text/text_parser.py b/py/core/parsers/text/text_parser.py index 791f0783c..51ff1cbd0 100644 --- a/py/core/parsers/text/text_parser.py +++ b/py/core/parsers/text/text_parser.py @@ -1,7 +1,6 @@ # type: ignore from typing import AsyncGenerator -from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser from core.base.providers import ( CompletionProvider, @@ -10,7 +9,7 @@ ) -class TextParser(AsyncParser[DataType]): +class TextParser(AsyncParser[str | bytes]): """A parser for raw text data.""" def __init__( @@ -24,8 +23,8 @@ def __init__( self.config = config async def ingest( - self, data: DataType, *args, **kwargs - ) -> AsyncGenerator[DataType, None]: + self, data: str | bytes, *args, **kwargs + ) -> AsyncGenerator[str | bytes, None]: if isinstance(data, bytes): data = data.decode("utf-8") yield data diff --git a/py/core/pipelines/rag_pipeline.py b/py/core/pipelines/rag_pipeline.py index 3fc380719..a74fc933e 100644 --- a/py/core/pipelines/rag_pipeline.py +++ b/py/core/pipelines/rag_pipeline.py @@ -4,7 +4,7 @@ from ..base.abstractions import ( GenerationConfig, - KGSearchSettings, + GraphSearchSettings, SearchSettings, ) from ..base.logger.base import RunType @@ -34,8 +34,7 @@ async def run( # type: ignore input: Any, state: Optional[AsyncState], run_manager: Optional[RunManager] = None, - vector_search_settings: SearchSettings = SearchSettings(), - kg_search_settings: KGSearchSettings = KGSearchSettings(), + search_settings: SearchSettings = SearchSettings(), rag_generation_config: GenerationConfig = GenerationConfig(), *args: Any, **kwargs: Any, @@ -62,8 +61,7 @@ async def multi_query_generator(input): async for query in input: input_kwargs = { **kwargs, - "vector_search_settings": vector_search_settings, - "kg_search_settings": kg_search_settings, + "search_settings": search_settings, } task = asyncio.create_task( self._search_pipeline.run( diff --git a/py/core/pipelines/search_pipeline.py b/py/core/pipelines/search_pipeline.py index ed7ede2da..bf9905f97 100644 --- a/py/core/pipelines/search_pipeline.py +++ b/py/core/pipelines/search_pipeline.py @@ -5,7 +5,7 @@ from ..base.abstractions import ( AggregateSearchResult, - KGSearchSettings, + GraphSearchSettings, SearchSettings, ) from ..base.logger.run_manager import RunManager, manage_run @@ -35,21 +35,12 @@ async def run( # type: ignore state: Optional[AsyncState], stream: bool = False, run_manager: Optional[RunManager] = None, - vector_search_settings: SearchSettings = SearchSettings(), - kg_search_settings: KGSearchSettings = KGSearchSettings(), + search_settings: SearchSettings = SearchSettings(), *args: Any, **kwargs: Any, ): request_state = state or AsyncState() - use_vector_search = ( - self._vector_search_pipeline is not None - and vector_search_settings.use_vector_search - ) - do_kg = ( - self._kg_search_pipeline is not None - and kg_search_settings.use_kg_search - ) run_manager = run_manager or self.run_manager async with manage_run(run_manager): vector_search_queue: Queue[str] = Queue() @@ -57,10 +48,8 @@ async def run( # type: ignore async def enqueue_requests(): async for message in input: - if use_vector_search: - await vector_search_queue.put(message) - if do_kg: - await kg_queue.put(message) + await vector_search_queue.put(message) + await kg_queue.put(message) await vector_search_queue.put(None) await kg_queue.put(None) @@ -69,47 +58,37 @@ async def enqueue_requests(): enqueue_task = asyncio.create_task(enqueue_requests()) # Start the embedding and KG pipelines in parallel - if use_vector_search: - if not self._vector_search_pipeline: - raise ValueError("Vector search pipeline not found") - - vector_search_task = asyncio.create_task( - self._vector_search_pipeline.run( - dequeue_requests(vector_search_queue), - request_state, - stream, - run_manager, - vector_search_settings=vector_search_settings, - *args, - **kwargs, - ) + vector_search_task = asyncio.create_task( + self._vector_search_pipeline.run( + dequeue_requests(vector_search_queue), + request_state, + stream, + run_manager, + search_settings=search_settings, + *args, + **kwargs, ) - - if do_kg: - if not self._kg_search_pipeline: - raise ValueError("KG search pipeline not found") - kg_task = asyncio.create_task( - self._kg_search_pipeline.run( - dequeue_requests(kg_queue), - request_state, - stream, - run_manager, - kg_search_settings=kg_search_settings, - *args, - **kwargs, - ) + ) + kg_task = asyncio.create_task( + self._kg_search_pipeline.run( + dequeue_requests(kg_queue), + request_state, + stream, + run_manager, + search_settings=search_settings, + *args, + **kwargs, ) + ) await enqueue_task - vector_search_results = ( - await vector_search_task if use_vector_search else [] - ) - kg_results = await kg_task if do_kg else [] + chunk_search_results = await vector_search_task + kg_results = await kg_task return AggregateSearchResult( - vector_search_results=vector_search_results, - kg_search_results=kg_results, + chunk_search_results=chunk_search_results, + graph_search_results=kg_results, ) def add_pipe( diff --git a/py/core/pipes/__init__.py b/py/core/pipes/__init__.py index a83373be8..5c1ab5c4b 100644 --- a/py/core/pipes/__init__.py +++ b/py/core/pipes/__init__.py @@ -7,10 +7,10 @@ from .kg.community_summary import KGCommunitySummaryPipe from .kg.deduplication import KGEntityDeduplicationPipe from .kg.deduplication_summary import KGEntityDeduplicationSummaryPipe -from .kg.entity_description import KGEntityDescriptionPipe +from .kg.description import KGEntityDescriptionPipe +from .kg.extraction import KGExtractionPipe from .kg.prompt_tuning import KGPromptTuningPipe from .kg.storage import KGStoragePipe -from .kg.triples_extraction import KGTriplesExtractionPipe from .retrieval.kg_search_pipe import KGSearchSearchPipe from .retrieval.multi_search import MultiSearchPipe from .retrieval.query_transform_pipe import QueryTransformPipe @@ -23,7 +23,7 @@ "SearchPipe", "GeneratorPipe", "EmbeddingPipe", - "KGTriplesExtractionPipe", + "KGExtractionPipe", "KGSearchSearchPipe", "KGEntityDescriptionPipe", "ParsingPipe", diff --git a/py/core/pipes/abstractions/search_pipe.py b/py/core/pipes/abstractions/search_pipe.py index 98a152db1..197957d61 100644 --- a/py/core/pipes/abstractions/search_pipe.py +++ b/py/core/pipes/abstractions/search_pipe.py @@ -3,17 +3,17 @@ from typing import Any, AsyncGenerator, Optional, Union from uuid import UUID -from core.base import AsyncPipe, AsyncState, VectorSearchResult +from core.base import AsyncPipe, AsyncState, ChunkSearchResult from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider logger = logging.getLogger() -class SearchPipe(AsyncPipe[VectorSearchResult]): +class SearchPipe(AsyncPipe[ChunkSearchResult]): class SearchConfig(AsyncPipe.PipeConfig): name: str = "default_vector_search" filters: dict = {} - search_limit: int = 10 + limit: int = 10 class Input(AsyncPipe.Input): message: Union[AsyncGenerator[str, None], str] @@ -39,7 +39,7 @@ async def search( search_settings: Any, *args: Any, **kwargs: Any, - ) -> AsyncGenerator[VectorSearchResult, None]: + ) -> AsyncGenerator[ChunkSearchResult, None]: pass @abstractmethod @@ -50,5 +50,5 @@ async def _run_logic( run_id: UUID, *args: Any, **kwargs, - ) -> AsyncGenerator[VectorSearchResult, None]: + ) -> AsyncGenerator[ChunkSearchResult, None]: pass diff --git a/py/core/pipes/ingestion/embedding_pipe.py b/py/core/pipes/ingestion/embedding_pipe.py index c95e595c7..a7782f153 100644 --- a/py/core/pipes/ingestion/embedding_pipe.py +++ b/py/core/pipes/ingestion/embedding_pipe.py @@ -4,7 +4,7 @@ from core.base import ( AsyncState, - DocumentExtraction, + DocumentChunk, EmbeddingProvider, R2RDocumentProcessingError, Vector, @@ -22,7 +22,7 @@ class EmbeddingPipe(AsyncPipe[VectorEntry]): """ class Input(AsyncPipe.Input): - message: list[DocumentExtraction] + message: list[DocumentChunk] def __init__( self, @@ -40,23 +40,21 @@ def __init__( self.embedding_provider = embedding_provider self.embedding_batch_size = embedding_batch_size - async def embed( - self, extractions: list[DocumentExtraction] - ) -> list[float]: + async def embed(self, extractions: list[DocumentChunk]) -> list[float]: return await self.embedding_provider.async_get_embeddings( [extraction.data for extraction in extractions], # type: ignore EmbeddingProvider.PipeStage.BASE, ) async def _process_batch( - self, extraction_batch: list[DocumentExtraction] + self, extraction_batch: list[DocumentChunk] ) -> list[VectorEntry]: vectors = await self.embed(extraction_batch) return [ VectorEntry( - extraction_id=extraction.id, + id=extraction.id, document_id=extraction.document_id, - user_id=extraction.user_id, + owner_id=extraction.owner_id, collection_ids=extraction.collection_ids, vector=Vector(data=raw_vector), text=extraction.data, # type: ignore @@ -130,7 +128,7 @@ async def process_batch(batch): self.log_queue.task_done() async def _process_extraction( - self, extraction: DocumentExtraction + self, extraction: DocumentChunk ) -> Union[VectorEntry, R2RDocumentProcessingError]: try: if isinstance(extraction.data, bytes): @@ -144,9 +142,9 @@ async def _process_extraction( ) return VectorEntry( - extraction_id=extraction.id, + id=extraction.id, document_id=extraction.document_id, - user_id=extraction.user_id, + owner_id=extraction.owner_id, collection_ids=extraction.collection_ids, vector=Vector(data=vectors[0]), text=extraction.data, diff --git a/py/core/pipes/ingestion/parsing_pipe.py b/py/core/pipes/ingestion/parsing_pipe.py index 3855b1320..df5edb813 100644 --- a/py/core/pipes/ingestion/parsing_pipe.py +++ b/py/core/pipes/ingestion/parsing_pipe.py @@ -2,12 +2,7 @@ from typing import AsyncGenerator, Optional from uuid import UUID -from core.base import ( - AsyncState, - DatabaseProvider, - Document, - DocumentExtraction, -) +from core.base import AsyncState, DatabaseProvider, Document, DocumentChunk from core.base.abstractions import R2RDocumentProcessingError from core.base.pipes.base_pipe import AsyncPipe from core.base.providers.ingestion import IngestionProvider @@ -45,7 +40,7 @@ async def _parse( run_id: UUID, version: str, ingestion_config_override: Optional[dict], - ) -> AsyncGenerator[DocumentExtraction, None]: + ) -> AsyncGenerator[DocumentChunk, None]: try: ingestion_config_override = ingestion_config_override or {} override_provider = ingestion_config_override.pop("provider", None) @@ -85,7 +80,7 @@ async def _run_logic( # type: ignore run_id: UUID, *args, **kwargs, - ) -> AsyncGenerator[DocumentExtraction, None]: + ) -> AsyncGenerator[DocumentChunk, None]: ingestion_config = kwargs.get("ingestion_config") async for result in self._parse( diff --git a/py/core/pipes/kg/clustering.py b/py/core/pipes/kg/clustering.py index 1e06be3fd..8f392fe07 100644 --- a/py/core/pipes/kg/clustering.py +++ b/py/core/pipes/kg/clustering.py @@ -1,14 +1,14 @@ import logging -from typing import Any, AsyncGenerator, Optional +from typing import Any, AsyncGenerator from uuid import UUID from core.base import ( AsyncPipe, AsyncState, CompletionProvider, - DatabaseProvider, EmbeddingProvider, ) +from core.providers.database import PostgresDBProvider from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider logger = logging.getLogger() @@ -16,12 +16,12 @@ class KGClusteringPipe(AsyncPipe): """ - Clusters entities and triples into communities within the knowledge graph using hierarchical Leiden algorithm. + Clusters entities and relationships into communities within the knowledge graph using hierarchical Leiden algorithm. """ def __init__( self, - database_provider: DatabaseProvider, + database_provider: PostgresDBProvider, llm_provider: CompletionProvider, embedding_provider: EmbeddingProvider, config: AsyncPipe.PipeConfig, @@ -46,18 +46,12 @@ async def cluster_kg( leiden_params: dict, ): """ - Clusters the knowledge graph triples into communities using hierarchical Leiden algorithm. Uses graspologic library. + Clusters the knowledge graph relationships into communities using hierarchical Leiden algorithm. Uses graspologic library. """ - num_communities = ( - await self.database_provider.perform_graph_clustering( - collection_id, - leiden_params, - ) - ) # type: ignore - - logger.info( - f"Clustering completed. Generated {num_communities} communities." + num_communities = await self.database_provider.graph_handler.perform_graph_clustering( + collection_id=collection_id, + leiden_params=leiden_params, ) return { @@ -73,10 +67,13 @@ async def _run_logic( # type: ignore **kwargs: Any, ) -> AsyncGenerator[dict, None]: """ - Executes the KG clustering pipe: clustering entities and triples into communities. + Executes the KG clustering pipe: clustering entities and relationships into communities. """ - collection_id = input.message["collection_id"] + collection_id = input.message.get("collection_id", None) leiden_params = input.message["leiden_params"] - yield await self.cluster_kg(collection_id, leiden_params) + yield await self.cluster_kg( + collection_id=collection_id, + leiden_params=leiden_params, + ) diff --git a/py/core/pipes/kg/community_summary.py b/py/core/pipes/kg/community_summary.py index 0afb3050d..42a448ab8 100644 --- a/py/core/pipes/kg/community_summary.py +++ b/py/core/pipes/kg/community_summary.py @@ -3,19 +3,19 @@ import logging import random import time -from typing import Any, AsyncGenerator, Optional -from uuid import UUID +from typing import Any, AsyncGenerator +from uuid import UUID, uuid4 from core.base import ( AsyncPipe, AsyncState, - CommunityReport, + Community, CompletionProvider, - DatabaseProvider, EmbeddingProvider, GenerationConfig, ) -from core.base.abstractions import Entity, Triple +from core.base.abstractions import Entity, Relationship +from core.providers.database import PostgresDBProvider from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider logger = logging.getLogger() @@ -23,12 +23,12 @@ class KGCommunitySummaryPipe(AsyncPipe): """ - Clusters entities and triples into communities within the knowledge graph using hierarchical Leiden algorithm. + Clusters entities and relationships into communities within the knowledge graph using hierarchical Leiden algorithm. """ def __init__( self, - database_provider: DatabaseProvider, + database_provider: PostgresDBProvider, llm_provider: CompletionProvider, embedding_provider: EmbeddingProvider, config: AsyncPipe.PipeConfig, @@ -51,28 +51,30 @@ def __init__( async def community_summary_prompt( self, entities: list[Entity], - triples: list[Triple], + relationships: list[Relationship], max_summary_input_length: int, ): entity_map: dict[str, dict[str, list[Any]]] = {} for entity in entities: if not entity.name in entity_map: - entity_map[entity.name] = {"entities": [], "triples": []} - entity_map[entity.name]["entities"].append(entity) + entity_map[entity.name] = {"entities": [], "relationships": []} # type: ignore + entity_map[entity.name]["entities"].append(entity) # type: ignore - for triple in triples: - if not triple.subject in entity_map: - entity_map[triple.subject] = { + for relationship in relationships: + if not relationship.subject in entity_map: + entity_map[relationship.subject] = { # type: ignore "entities": [], - "triples": [], + "relationships": [], } - entity_map[triple.subject]["triples"].append(triple) + entity_map[relationship.subject]["relationships"].append( # type: ignore + relationship + ) - # sort in descending order of triple count + # sort in descending order of relationship count sorted_entity_map = sorted( entity_map.items(), - key=lambda x: len(x[1]["triples"]), + key=lambda x: len(x[1]["relationships"]), reverse=True, ) @@ -90,15 +92,17 @@ async def _get_entity_descriptions_string( for entity in sampled_entities ) - async def _get_triples_string(triples: list, max_count: int = 100): - sampled_triples = ( - random.sample(triples, max_count) - if len(triples) > max_count - else triples + async def _get_relationships_string( + relationships: list, max_count: int = 100 + ): + sampled_relationships = ( + random.sample(relationships, max_count) + if len(relationships) > max_count + else relationships ) return "\n".join( - f"{triple.id},{triple.subject},{triple.object},{triple.predicate},{triple.description}" - for triple in sampled_triples + f"{relationship.id},{relationship.subject},{relationship.object},{relationship.predicate},{relationship.description}" + for relationship in sampled_relationships ) prompt = "" @@ -106,14 +110,16 @@ async def _get_triples_string(triples: list, max_count: int = 100): entity_descriptions = await _get_entity_descriptions_string( entity_data["entities"] ) - triples = await _get_triples_string(entity_data["triples"]) + relationships = await _get_relationships_string( + entity_data["relationships"] + ) prompt += f""" Entity: {entity_name} Descriptions: {entity_descriptions} - Triples: - {triples} + Relationships: + {relationships} """ if len(prompt) > max_summary_input_length: @@ -128,25 +134,28 @@ async def _get_triples_string(triples: list, max_count: int = 100): async def process_community( self, - community_number: int, + community_id: UUID, max_summary_input_length: int, generation_config: GenerationConfig, - collection_id: UUID, + collection_id: UUID | None, + nodes: list[str], + all_entities: list[Entity], + all_relationships: list[Relationship], ) -> dict: """ Process a community by summarizing it and creating a summary embedding and storing it to a database. """ - community_level, entities, triples = ( - await self.database_provider.get_community_details( - community_number=community_number, - collection_id=collection_id, - ) - ) + entities = [entity for entity in all_entities if entity.name in nodes] + relationships = [ + relationship + for relationship in all_relationships + if relationship.subject in nodes and relationship.object in nodes + ] - if entities == [] and triples == []: + if not entities and not relationships: raise ValueError( - f"Community {community_number} has no entities or triples." + f"Community {community_id} has no entities or relationships." ) for attempt in range(3): @@ -155,12 +164,12 @@ async def process_community( ( await self.llm_provider.aget_completion( messages=await self.database_provider.prompt_handler.get_message_payload( - task_prompt_name=self.database_provider.config.kg_enrichment_settings.community_reports_prompt, + task_prompt_name=self.database_provider.config.graph_enrichment_settings.graphrag_communities, task_inputs={ "input_text": ( await self.community_summary_prompt( entities, - triples, + relationships, max_summary_input_length, ) ), @@ -180,7 +189,7 @@ async def process_community( ) else: raise ValueError( - f"Failed to generate a summary for community {community_number} at level {community_level}." + f"Failed to generate a summary for community {community_id}" ) description_dict = json.loads(description) @@ -193,23 +202,22 @@ async def process_community( except Exception as e: if attempt == 2: logger.error( - f"KGCommunitySummaryPipe: Error generating community summary for community {community_number}: {e}" + f"KGCommunitySummaryPipe: Error generating community summary for community {community_id}: {e}" ) return { - "community_number": community_number, + "community_id": community_id, "error": str(e), } - community_report = CommunityReport( - community_number=community_number, + community = Community( + community_id=community_id, collection_id=collection_id, - level=community_level, name=name, summary=summary, rating=rating, rating_explanation=rating_explanation, findings=findings, - embedding=await self.embedding_provider.async_get_embedding( + description_embedding=await self.embedding_provider.async_get_embedding( "Summary:\n" + summary + "\n\nFindings:\n" @@ -217,11 +225,11 @@ async def process_community( ), ) - await self.database_provider.add_community_report(community_report) + await self.database_provider.graph_handler.add_community(community) return { - "community_number": community_report.community_number, - "name": community_report.name, + "community_id": community.community_id, + "name": community.name, } async def _run_logic( # type: ignore @@ -242,7 +250,7 @@ async def _run_logic( # type: ignore limit = input.message["limit"] generation_config = input.message["generation_config"] max_summary_input_length = input.message["max_summary_input_length"] - collection_id = input.message["collection_id"] + collection_id = input.message.get("collection_id", None) community_summary_jobs = [] logger = input.message.get("logger", logging.getLogger()) @@ -250,26 +258,57 @@ async def _run_logic( # type: ignore logger.info( f"KGCommunitySummaryPipe: Checking if community summaries exist for communities {offset} to {offset + limit}" ) - community_numbers_exist = ( - await self.database_provider.check_community_reports_exist( - collection_id=collection_id, offset=offset, limit=limit + + all_entities, _ = ( + await self.database_provider.graph_handler.get_entities( + parent_id=collection_id, + offset=0, + limit=-1, + include_embeddings=False, ) ) - logger.info( - f"KGCommunitySummaryPipe: Community summaries exist for communities {len(community_numbers_exist)}" + all_relationships, _ = ( + await self.database_provider.graph_handler.get_relationships( + parent_id=collection_id, + offset=0, + limit=-1, + include_embeddings=False, + ) ) - for community_number in range(offset, offset + limit): - if community_number not in community_numbers_exist: - community_summary_jobs.append( - self.process_community( - community_number=community_number, - max_summary_input_length=max_summary_input_length, - generation_config=generation_config, - collection_id=collection_id, - ) + # Perform clustering + leiden_params = input.message.get("leiden_params", {}) + _, community_clusters = ( + await self.database_provider.graph_handler._cluster_and_add_community_info( + relationships=all_relationships, + relationship_ids_cache={}, + leiden_params=leiden_params, + collection_id=collection_id, + ) + ) + + # Organize clusters + clusters: dict[Any] = {} + for item in community_clusters: + cluster_id = item.cluster + if cluster_id not in clusters: + clusters[cluster_id] = [] + clusters[cluster_id].append(item.node) + + # Now, process the clusters + for _, nodes in clusters.items(): + community_summary_jobs.append( + self.process_community( + community_id=uuid4(), + nodes=nodes, + all_entities=all_entities, + all_relationships=all_relationships, + max_summary_input_length=max_summary_input_length, + generation_config=generation_config, + collection_id=collection_id, ) + ) total_jobs = len(community_summary_jobs) total_errors = 0 @@ -285,7 +324,7 @@ async def _run_logic( # type: ignore if "error" in summary: logger.error( - f"KGCommunitySummaryPipe: Error generating community summary for community {summary['community_number']}: {summary['error']}" + f"KGCommunitySummaryPipe: Error generating community summary for community {summary['community_id']}: {summary['error']}" ) total_errors += 1 continue diff --git a/py/core/pipes/kg/deduplication.py b/py/core/pipes/kg/deduplication.py index 441167610..dd0382833 100644 --- a/py/core/pipes/kg/deduplication.py +++ b/py/core/pipes/kg/deduplication.py @@ -1,10 +1,8 @@ import json import logging -from typing import Any, Union +from typing import Any from uuid import UUID -from fastapi import HTTPException - from core.base import AsyncState from core.base.abstractions import Entity, KGEntityDeduplicationType from core.base.pipes import AsyncPipe @@ -26,14 +24,12 @@ def __init__( self, config: AsyncPipe.PipeConfig, database_provider: PostgresDBProvider, - llm_provider: Union[ - OpenAICompletionProvider, LiteLLMCompletionProvider - ], - embedding_provider: Union[ - LiteLLMEmbeddingProvider, - OpenAIEmbeddingProvider, - OllamaEmbeddingProvider, - ], + llm_provider: OpenAICompletionProvider | LiteLLMCompletionProvider, + embedding_provider: ( + LiteLLMEmbeddingProvider + | OpenAIEmbeddingProvider + | OllamaEmbeddingProvider + ), logging_provider: SqlitePersistentLoggingProvider, **kwargs, ): @@ -46,130 +42,127 @@ def __init__( self.llm_provider = llm_provider self.embedding_provider = embedding_provider - async def kg_named_entity_deduplication( - self, collection_id: UUID, **kwargs + async def _get_entities( + self, graph_id: UUID | None, collection_id: UUID | None ): - try: - entity_count = await self.database_provider.get_entity_count( - collection_id=collection_id, distinct=True - ) - - logger.info( - f"KGEntityDeduplicationPipe: Getting entities for collection {collection_id}" + if collection_id is not None: + return await self.database_provider.graph_handler.get_entities( + collection_id=collection_id, offset=0, limit=-1 ) - logger.info( - f"KGEntityDeduplicationPipe: Entity count: {entity_count}" + elif graph_id is not None: + # TODO: remove the tuple return type + return ( + await self.database_provider.graph_handler.entities.get( + id=graph_id, + offset=0, + limit=-1, + ) + )[0] + else: + raise ValueError( + "Either graph_id or collection_id must be provided" ) - entities = ( - await self.database_provider.get_entities( - collection_id=collection_id, offset=0, limit=-1 - ) - )["entities"] + async def kg_named_entity_deduplication( + self, graph_id: UUID | None, collection_id: UUID | None, **kwargs + ): - logger.info( - f"KGEntityDeduplicationPipe: Got {len(entities)} entities for collection {collection_id}" - ) + import numpy as np - # deduplicate entities by name - deduplicated_entities: dict[str, dict[str, list[str]]] = {} - deduplication_source_keys = [ - "extraction_ids", - "document_id", - "attributes", - ] - deduplication_target_keys = [ - "extraction_ids", - "document_ids", - "attributes", - ] - deduplication_keys = list( - zip(deduplication_source_keys, deduplication_target_keys) - ) - for entity in entities: - if entity.name not in deduplicated_entities: - deduplicated_entities[entity.name] = { - target_key: [] for _, target_key in deduplication_keys - } - for source_key, target_key in deduplication_keys: - value = getattr(entity, source_key) - if isinstance(value, list): - deduplicated_entities[entity.name][target_key].extend( - value - ) - else: - deduplicated_entities[entity.name][target_key].append( - value - ) - - logger.info( - f"KGEntityDeduplicationPipe: Deduplicated {len(deduplicated_entities)} entities" - ) + entities = await self._get_entities(graph_id, collection_id) - # upsert deduplcated entities in the collection_entity table - deduplicated_entities_list = [ - Entity( - name=name, - collection_id=collection_id, - extraction_ids=entity["extraction_ids"], - document_ids=entity["document_ids"], - attributes={}, - ) - for name, entity in deduplicated_entities.items() - ] + logger.info( + f"KGEntityDeduplicationPipe: Got {len(entities)} entities for {graph_id or collection_id}" + ) - logger.info( - f"KGEntityDeduplicationPipe: Upserting {len(deduplicated_entities_list)} deduplicated entities for collection {collection_id}" - ) - await self.database_provider.add_entities( - deduplicated_entities_list, - table_name="collection_entity", - conflict_columns=["name", "collection_id", "attributes"], - ) + # deduplicate entities by name + deduplicated_entities: dict[str, dict[str, list[str]]] = {} + deduplication_source_keys = [ + "description", + "chunk_ids", + "document_id", + # "description_embedding", + ] + deduplication_target_keys = [ + "description", + "chunk_ids", + "document_ids", + # "description_embedding", + ] + deduplication_keys = list( + zip(deduplication_source_keys, deduplication_target_keys) + ) + for entity in entities: + if entity.name not in deduplicated_entities: + deduplicated_entities[entity.name] = { + target_key: [] for _, target_key in deduplication_keys + } + # deduplicated_entities[entity.name]['total_entries'] = 0 + # deduplicated_entities[entity.name]['description_embedding'] = np.zeros(len(json.loads(entity.description_embedding))) + + for source_key, target_key in deduplication_keys: + value = getattr(entity, source_key) + + # if source_key == "description_embedding": + # deduplicated_entities[entity.name]['total_entries'] += 1 + # deduplicated_entities[entity.name][target_key] += np.array(json.loads(value)) + + if isinstance(value, list): + deduplicated_entities[entity.name][target_key].extend( + value + ) + else: + deduplicated_entities[entity.name][target_key].append( + value + ) - yield { - "result": f"successfully deduplicated {len(entities)} entities to {len(deduplicated_entities)} entities for collection {collection_id}", - "num_entities": len(deduplicated_entities), - } - except Exception as e: - logger.error( - f"KGEntityDeduplicationPipe: Error in entity deduplication: {str(e)}" - ) - raise HTTPException( - status_code=500, - detail=f"KGEntityDeduplicationPipe: Error deduplicating entities: {str(e)}", + # upsert deduplcated entities in the collection_entity table + deduplicated_entities_list = [ + Entity( + name=name, + # description="\n".join(entity["description"]), + # description_embedding=json.dumps((entity["description_embedding"] / entity['total_entries']).tolist()), + collection_id=collection_id, + graph_id=graph_id, + chunk_ids=list(set(entity["chunk_ids"])), + document_ids=list(set(entity["document_ids"])), + attributes={}, ) + for name, entity in deduplicated_entities.items() + ] + + logger.info( + f"KGEntityDeduplicationPipe: Upserting {len(deduplicated_entities_list)} deduplicated entities for collection {graph_id}" + ) + + await self.database_provider.graph_handler.add_entities( + deduplicated_entities_list, + table_name="collection_entity", + ) + + yield { + "result": f"successfully deduplicated {len(entities)} entities to {len(deduplicated_entities)} entities for collection {graph_id}", + "num_entities": len(deduplicated_entities), + } async def kg_description_entity_deduplication( - self, collection_id: UUID, **kwargs + self, graph_id: UUID | None, collection_id: UUID | None, **kwargs ): from sklearn.cluster import DBSCAN - entities = ( - await self.database_provider.get_entities( - collection_id=collection_id, - offset=0, - limit=-1, - extra_columns=["description_embedding"], - ) - )["entities"] - + entities = await self._get_entities(graph_id, collection_id) for entity in entities: entity.description_embedding = json.loads( entity.description_embedding ) - logger.info( - f"KGEntityDeduplicationPipe: Got {len(entities)} entities for collection {collection_id}" - ) - deduplication_source_keys = [ - "extraction_ids", + "chunk_ids", "document_id", "attributes", ] deduplication_target_keys = [ - "extraction_ids", + "chunk_ids", "document_ids", "attributes", ] @@ -220,23 +213,24 @@ async def kg_description_entity_deduplication( description = "\n".join(descriptions[:5]) # Collect all extraction IDs from entities in the cluster - extraction_ids = set() + chunk_ids = set() document_ids = set() for entity in entities: - if entity.extraction_ids: - extraction_ids.update(entity.extraction_ids) + if entity.chunk_ids: + chunk_ids.update(entity.chunk_ids) if entity.document_id: document_ids.add(entity.document_id) - extraction_ids_list = list(extraction_ids) + chunk_ids_list = list(chunk_ids) document_ids_list = list(document_ids) deduplicated_entities_list.append( Entity( name=longest_name, description=description, + graph_id=graph_id, collection_id=collection_id, - extraction_ids=extraction_ids_list, + chunk_ids=chunk_ids_list, document_ids=document_ids_list, attributes={ "aliases": list(aliases), @@ -245,24 +239,26 @@ async def kg_description_entity_deduplication( ) logger.info( - f"KGEntityDeduplicationPipe: Upserting {len(deduplicated_entities_list)} deduplicated entities for collection {collection_id}" + f"KGEntityDeduplicationPipe: Upserting {len(deduplicated_entities_list)} deduplicated entities for collection {graph_id}" ) - await self.database_provider.add_entities( + await self.database_provider.graph_handler.add_entities( deduplicated_entities_list, table_name="collection_entity", - conflict_columns=["name", "collection_id", "attributes"], + conflict_columns=["name", "graph_id", "attributes"], ) yield { - "result": f"successfully deduplicated {len(entities)} entities to {len(deduplicated_entities)} entities for collection {collection_id}", + "result": f"successfully deduplicated {len(entities)} entities to {len(deduplicated_entities)} entities for collection {graph_id}", "num_entities": len(deduplicated_entities), } - async def kg_llm_entity_deduplication(self, collection_id: UUID, **kwargs): - # TODO: implement LLM based entity deduplication - raise NotImplementedError( - "LLM entity deduplication is not implemented yet" - ) + # async def kg_llm_entity_deduplication( + # self, graph_id: UUID, collection_id: UUID, **kwargs + # ): + # # TODO: implement LLM based entity deduplication + # raise NotImplementedError( + # "LLM entity deduplication is not implemented yet" + # ) async def _run_logic( self, @@ -274,43 +270,44 @@ async def _run_logic( ): # TODO: figure out why the return type AsyncGenerator[dict, None] is not working - collection_id = input.message["collection_id"] + graph_id = input.message.get("graph_id", None) + collection_id = input.message.get("collection_id", None) + + if graph_id and collection_id: + raise ValueError( + "graph_id and collection_id cannot both be provided" + ) - kg_entity_deduplication_type = input.message[ - "kg_entity_deduplication_type" + graph_entity_deduplication_type = input.message[ + "graph_entity_deduplication_type" ] - if kg_entity_deduplication_type == KGEntityDeduplicationType.BY_NAME: - logger.info( - f"KGEntityDeduplicationPipe: Running named entity deduplication for collection {collection_id}" - ) + if ( + graph_entity_deduplication_type + == KGEntityDeduplicationType.BY_NAME + ): async for result in self.kg_named_entity_deduplication( - collection_id, **kwargs + graph_id=graph_id, collection_id=collection_id, **kwargs ): yield result elif ( - kg_entity_deduplication_type + graph_entity_deduplication_type == KGEntityDeduplicationType.BY_DESCRIPTION ): - logger.info( - f"KGEntityDeduplicationPipe: Running description entity deduplication for collection {collection_id}" - ) - async for result in self.kg_description_entity_deduplication( # type: ignore - collection_id, **kwargs + async for result in self.kg_description_entity_deduplication( + graph_id=graph_id, collection_id=collection_id, **kwargs ): yield result - elif kg_entity_deduplication_type == KGEntityDeduplicationType.BY_LLM: - logger.info( - f"KGEntityDeduplicationPipe: Running LLM entity deduplication for collection {collection_id}" + elif ( + graph_entity_deduplication_type == KGEntityDeduplicationType.BY_LLM + ): + raise NotImplementedError( + "LLM entity deduplication is not implemented yet" ) - async for result in self.kg_llm_entity_deduplication( # type: ignore - collection_id, **kwargs - ): - yield result else: raise ValueError( - f"Invalid kg_entity_deduplication_type: {kg_entity_deduplication_type}" + f"Invalid graph_entity_deduplication_type: {graph_entity_deduplication_type}" ) diff --git a/py/core/pipes/kg/deduplication_summary.py b/py/core/pipes/kg/deduplication_summary.py index d494a1bb2..847c2f351 100644 --- a/py/core/pipes/kg/deduplication_summary.py +++ b/py/core/pipes/kg/deduplication_summary.py @@ -58,14 +58,14 @@ async def _merge_entity_descriptions_llm_prompt( description_length = 0 while index < len(entity_descriptions) and not ( len(entity_descriptions[index]) + description_length - > self.database_provider.config.kg_entity_deduplication_settings.max_description_input_length + > self.database_provider.config.graph_entity_deduplication_settings.max_description_input_length ): description_length += len(entity_descriptions[index]) index += 1 completion = await self.llm_provider.aget_completion( messages=await self.database_provider.prompt_handler.get_message_payload( - task_prompt_name=self.database_provider.config.kg_entity_deduplication_settings.kg_entity_deduplication_prompt, + task_prompt_name=self.database_provider.config.graph_entity_deduplication_settings.graph_entity_deduplication_prompt, task_inputs={ "entity_name": entity_name, "entity_descriptions": "\n".join( @@ -105,7 +105,7 @@ async def _merge_entity_descriptions( ) async def _prepare_and_upsert_entities( - self, entities_batch: list[Entity], collection_id: UUID + self, entities_batch: list[Entity], graph_id: UUID ) -> Any: embeddings = await self.embedding_provider.async_get_embeddings( @@ -114,21 +114,52 @@ async def _prepare_and_upsert_entities( for i, entity in enumerate(entities_batch): entity.description_embedding = str(embeddings[i]) # type: ignore - entity.collection_id = collection_id + entity.graph_id = graph_id logger.info( - f"Upserting {len(entities_batch)} entities for collection {collection_id}" + f"Upserting {len(entities_batch)} entities for graph {graph_id}" ) - await self.database_provider.update_entity_descriptions(entities_batch) + await self.database_provider.graph_handler.update_entity_descriptions( + entities_batch + ) logger.info( - f"Upserted {len(entities_batch)} entities for collection {collection_id}" + f"Upserted {len(entities_batch)} entities for graph {graph_id}" ) for entity in entities_batch: yield entity + async def _get_entities( + self, + graph_id: Optional[UUID], + collection_id: Optional[UUID], + offset: int, + limit: int, + level, + ): + + if graph_id is not None: + return await self.database_provider.graph_handler.entities.get( + parent_id=graph_id, + offset=offset, + limit=limit, + level=level, + ) + + elif collection_id is not None: + return await self.database_provider.graph_handler.get_entities( + parent_id=collection_id, + offset=offset, + limit=limit, + ) + + else: + raise ValueError( + "Either graph_id or collection_id must be provided" + ) + async def _run_logic( self, input: AsyncPipe.Input, @@ -139,37 +170,37 @@ async def _run_logic( ): # TODO: figure out why the return type AsyncGenerator[dict, None] is not working - collection_id = input.message["collection_id"] + graph_id = input.message.get("graph_id", None) + collection_id = input.message.get("collection_id", None) offset = input.message["offset"] limit = input.message["limit"] - kg_entity_deduplication_type = input.message[ - "kg_entity_deduplication_type" + graph_entity_deduplication_type = input.message[ + "graph_entity_deduplication_type" ] - kg_entity_deduplication_prompt = input.message[ - "kg_entity_deduplication_prompt" + graph_entity_deduplication_prompt = input.message[ + "graph_entity_deduplication_prompt" ] generation_config = input.message["generation_config"] logger.info( - f"Running kg_entity_deduplication_summary for collection {collection_id} with settings kg_entity_deduplication_type: {kg_entity_deduplication_type}, kg_entity_deduplication_prompt: {kg_entity_deduplication_prompt}, generation_config: {generation_config}" + f"Running kg_entity_deduplication_summary for graph {graph_id} with settings graph_entity_deduplication_type: {graph_entity_deduplication_type}, graph_entity_deduplication_prompt: {graph_entity_deduplication_prompt}, generation_config: {generation_config}" ) - entities = ( - await self.database_provider.get_entities( - collection_id, - entity_table_name="collection_entity", - offset=offset, - limit=limit, - ) - )["entities"] + entities = await self._get_entities( + graph_id, + collection_id, + offset, + limit, # type: ignore + ) entity_names = [entity.name for entity in entities] entity_descriptions = ( - await self.database_provider.get_entities( - collection_id, + await self.database_provider.graph_handler.get_entities( + parent_id=collection_id, entity_names=entity_names, - entity_table_name="document_entity", + offset=offset, + limit=limit, ) )["entities"] @@ -182,7 +213,7 @@ async def _run_logic( ) logger.info( - f"Retrieved {len(entity_descriptions)} entity descriptions for collection {collection_id}" + f"Retrieved {len(entity_descriptions)} entity descriptions for graph {graph_id}" ) tasks = [] @@ -201,7 +232,7 @@ async def _run_logic( # prepare and upsert entities async for result in self._prepare_and_upsert_entities( - entities_batch, collection_id + entities_batch, graph_id ): yield result @@ -215,6 +246,6 @@ async def _run_logic( # prepare and upsert entities async for result in self._prepare_and_upsert_entities( - entities_batch, collection_id + entities_batch, graph_id ): yield result diff --git a/py/core/pipes/kg/entity_description.py b/py/core/pipes/kg/description.py similarity index 60% rename from py/core/pipes/kg/entity_description.py rename to py/core/pipes/kg/description.py index f480625a6..bec740f45 100644 --- a/py/core/pipes/kg/entity_description.py +++ b/py/core/pipes/kg/description.py @@ -1,4 +1,4 @@ -# pipe to extract nodes/triples etc +# pipe to extract nodes/relationships etc import asyncio import logging @@ -15,6 +15,7 @@ ) from core.base.abstractions import Entity from core.base.pipes.base_pipe import AsyncPipe +from core.providers.database import PostgresDBProvider from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider logger = logging.getLogger() @@ -30,7 +31,7 @@ class Input(AsyncPipe.Input): def __init__( self, - database_provider: DatabaseProvider, + database_provider: PostgresDBProvider, llm_provider: CompletionProvider, embedding_provider: EmbeddingProvider, config: AsyncPipe.PipeConfig, @@ -73,76 +74,66 @@ def truncate_info(info_list, max_length): return truncated_info async def process_entity( - entities, triples, max_description_input_length, document_id + entities, relationships, max_description_input_length, document_id ): entity_info = [ f"{entity.name}, {entity.description}" for entity in entities ] - triples_txt = [ - f"{i+1}: {triple.subject}, {triple.object}, {triple.predicate} - Summary: {triple.description}" - for i, triple in enumerate(triples) + relationships_txt = [ + f"{i+1}: {relationship.subject}, {relationship.object}, {relationship.predicate} - Summary: {relationship.description}" + for i, relationship in enumerate(relationships) ] # potentially slow at scale, but set to avoid duplicates - unique_extraction_ids = set() + unique_chunk_ids = set() for entity in entities: - for extraction_id in entity.extraction_ids: - unique_extraction_ids.add(extraction_id) - - out_entity = Entity( - name=entities[0].name, - extraction_ids=list(unique_extraction_ids), - document_ids=[document_id], - ) - - out_entity.description = ( - ( - await self.llm_provider.aget_completion( - messages=await self.database_provider.prompt_handler.get_message_payload( - task_prompt_name=self.database_provider.config.kg_creation_settings.kg_entity_description_prompt, - task_inputs={ - "entity_info": truncate_info( - entity_info, - max_description_input_length, - ), - "triples_txt": truncate_info( - triples_txt, - max_description_input_length, - ), - }, - ), - generation_config=self.database_provider.config.kg_creation_settings.generation_config, - ) - ) - .choices[0] - .message.content - ) + for chunk_id in entity.chunk_ids: + unique_chunk_ids.add(chunk_id) + out_entity = entities[0] if not out_entity.description: - logger.error(f"No description for entity {out_entity.name}") - return out_entity.name - - out_entity.description_embedding = ( - await self.embedding_provider.async_get_embeddings( - [out_entity.description] + out_entity.description = ( + ( + await self.llm_provider.aget_completion( + messages=await self.database_provider.prompt_handler.get_message_payload( + task_prompt_name=self.database_provider.config.graph_creation_settings.graph_entity_description_prompt, + task_inputs={ + "entity_info": truncate_info( + entity_info, + max_description_input_length, + ), + "relationships_txt": truncate_info( + relationships_txt, + max_description_input_length, + ), + }, + ), + generation_config=self.database_provider.config.graph_creation_settings.generation_config, + ) + ) + .choices[0] + .message.content ) - )[0] - # upsert the entity and its embedding - await self.database_provider.upsert_embeddings( - [ - ( - out_entity.name, - out_entity.description, - str(out_entity.description_embedding), - out_entity.extraction_ids, - document_id, + if not out_entity.description: + logger.error( + f"No description for entity {out_entity.name}" + ) + return out_entity.name + + out_entity.description_embedding = ( + await self.embedding_provider.async_get_embeddings( + [out_entity.description] ) - ], - "document_entity", - ) + )[0] + + # upsert the entity and its embedding + await self.database_provider.graph_handler.add_entities( + [out_entity], + table_name="documents_entities", + ) return out_entity.name @@ -155,7 +146,7 @@ async def process_entity( f"KGEntityDescriptionPipe: Getting entity map for document {document_id}", ) - entity_map = await self.database_provider.get_entity_map( + entity_map = await self.database_provider.graph_handler.get_entity_map( offset, limit, document_id ) total_entities = len(entity_map) @@ -171,7 +162,7 @@ async def process_entity( workflows.append( process_entity( entity_info["entities"], - entity_info["triples"], + entity_info["relationships"], input.message["max_description_input_length"], document_id, ) diff --git a/py/core/pipes/kg/triples_extraction.py b/py/core/pipes/kg/extraction.py similarity index 72% rename from py/core/pipes/kg/triples_extraction.py rename to py/core/pipes/kg/extraction.py index e0643e169..548601d8d 100644 --- a/py/core/pipes/kg/triples_extraction.py +++ b/py/core/pipes/kg/extraction.py @@ -8,16 +8,16 @@ from core.base import ( AsyncState, CompletionProvider, - DatabaseProvider, - DocumentExtraction, + DocumentChunk, Entity, GenerationConfig, KGExtraction, R2RDocumentProcessingError, R2RException, - Triple, + Relationship, ) from core.base.pipes.base_pipe import AsyncPipe +from core.providers.database import PostgresDBProvider from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider logger = logging.getLogger() @@ -32,7 +32,7 @@ class ClientError(Exception): pass -class KGTriplesExtractionPipe(AsyncPipe[dict]): +class KGExtractionPipe(AsyncPipe[dict]): """ Extracts knowledge graph information from document extractions. """ @@ -43,7 +43,7 @@ class Input(AsyncPipe.Input): def __init__( self, - database_provider: DatabaseProvider, + database_provider: PostgresDBProvider, llm_provider: CompletionProvider, config: AsyncPipe.PipeConfig, logging_provider: SqlitePersistentLoggingProvider, @@ -56,7 +56,9 @@ def __init__( super().__init__( logging_provider=logging_provider, config=config - or AsyncPipe.PipeConfig(name="default_kg_triples_extraction_pipe"), + or AsyncPipe.PipeConfig( + name="default_kg_relationships_extraction_pipe" + ), ) self.database_provider = database_provider self.llm_provider = llm_provider @@ -67,9 +69,9 @@ def __init__( async def extract_kg( self, - extractions: list[DocumentExtraction], + extractions: list[DocumentChunk], generation_config: GenerationConfig, - max_knowledge_triples: int, + max_knowledge_relationships: int, entity_types: list[str], relation_types: list[str], retries: int = 5, @@ -78,17 +80,17 @@ async def extract_kg( total_tasks: Optional[int] = None, ) -> KGExtraction: """ - Extracts NER triples from a extraction with retries. + Extracts NER relationships from a extraction with retries. """ # combine all extractions into a single string combined_extraction: str = " ".join([extraction.data for extraction in extractions]) # type: ignore messages = await self.database_provider.prompt_handler.get_message_payload( - task_prompt_name=self.database_provider.config.kg_creation_settings.kg_triples_extraction_prompt, + task_prompt_name=self.database_provider.config.graph_creation_settings.graphrag_relationships_extraction_few_shot, task_inputs={ "input": combined_extraction, - "max_knowledge_triples": max_knowledge_triples, + "max_knowledge_relationships": max_knowledge_relationships, "entity_types": "\n".join(entity_types), "relation_types": "\n".join(relation_types), }, @@ -141,8 +143,8 @@ def parse_fn(response_str: str) -> Any: category=entity_category, description=entity_description, name=entity_value, - document_id=extractions[0].document_id, - extraction_ids=[ + parent_id=extractions[0].document_id, + chunk_ids=[ extraction.id for extraction in extractions ], attributes={}, @@ -159,14 +161,14 @@ def parse_fn(response_str: str) -> Any: # check if subject and object are in entities_dict relations_arr.append( - Triple( + Relationship( subject=subject, predicate=predicate, object=object, description=description, weight=weight, - document_id=extractions[0].document_id, - extraction_ids=[ + parent_id=extractions[0].document_id, + chunk_ids=[ extraction.id for extraction in extractions ], attributes={}, @@ -175,14 +177,10 @@ def parse_fn(response_str: str) -> Any: return entities_arr, relations_arr - entities, triples = parse_fn(kg_extraction) + entities, relationships = parse_fn(kg_extraction) return KGExtraction( - extraction_ids=[ - extraction.id for extraction in extractions - ], - document_id=extractions[0].document_id, entities=entities, - triples=triples, + relationships=relationships, ) except ( @@ -196,20 +194,18 @@ def parse_fn(response_str: str) -> Any: await asyncio.sleep(delay) else: logger.error( - f"Failed after retries with for extraction {extractions[0].id} of document {extractions[0].document_id}: {e}" + f"Failed after retries with for chunk {extractions[0].id} of document {extractions[0].document_id}: {e}" ) # raise e # you should raise an error. - # add metadata to entities and triples + # add metadata to entities and relationships logger.info( f"KGExtractionPipe: Completed task number {task_id} of {total_tasks} for document {extractions[0].document_id}", ) return KGExtraction( - extraction_ids=[extraction.id for extraction in extractions], - document_id=extractions[0].document_id, entities=[], - triples=[], + relationships=[], ) async def _run_logic( # type: ignore @@ -225,8 +221,10 @@ async def _run_logic( # type: ignore document_id = input.message["document_id"] generation_config = input.message["generation_config"] - extraction_merge_count = input.message["extraction_merge_count"] - max_knowledge_triples = input.message["max_knowledge_triples"] + chunk_merge_count = input.message["chunk_merge_count"] + max_knowledge_relationships = input.message[ + "max_knowledge_relationships" + ] entity_types = input.message["entity_types"] relation_types = input.message["relation_types"] @@ -237,27 +235,24 @@ async def _run_logic( # type: ignore logger = input.message.get("logger", logging.getLogger()) logger.info( - f"KGTriplesExtractionPipe: Processing document {document_id} for KG extraction", - ) - - # First get the chunks response - chunks_response = await self.database_provider.get_document_chunks( - document_id=document_id + f"KGExtractionPipe: Processing document {document_id} for KG extraction", ) # Then create the extractions from the results extractions = [ - DocumentExtraction( - id=extraction["extraction_id"], + DocumentChunk( + id=extraction["id"], document_id=extraction["document_id"], - user_id=extraction["user_id"], + owner_id=extraction["owner_id"], collection_ids=extraction["collection_ids"], data=extraction["text"], metadata=extraction["metadata"], ) for extraction in ( - await self.database_provider.get_document_chunks( - document_id=document_id + await self.database_provider.list_document_chunks( # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + document_id=document_id, + offset=0, + limit=100, ) )["results"] ] @@ -267,16 +262,16 @@ async def _run_logic( # type: ignore ) if filter_out_existing_chunks: - existing_extraction_ids = await self.database_provider.get_existing_entity_extraction_ids( + existing_chunk_ids = await self.database_provider.graph_handler.get_existing_document_entity_chunk_ids( document_id=document_id ) extractions = [ extraction for extraction in extractions - if extraction.id not in existing_extraction_ids + if extraction.id not in existing_chunk_ids ] logger.info( - f"Filtered out {len(existing_extraction_ids)} existing extractions, remaining {len(extractions)} extractions for document {document_id}" + f"Filtered out {len(existing_chunk_ids)} existing extractions, remaining {len(extractions)} extractions for document {document_id}" ) if len(extractions) == 0: @@ -284,7 +279,7 @@ async def _run_logic( # type: ignore return logger.info( - f"KGTriplesExtractionPipe: Obtained {len(extractions)} extractions to process, time from start: {time.time() - start_time:.2f} seconds", + f"KGExtractionPipe: Obtained {len(extractions)} extractions to process, time from start: {time.time() - start_time:.2f} seconds", ) # sort the extractions accroding to chunk_order field in metadata in ascending order @@ -293,14 +288,14 @@ async def _run_logic( # type: ignore key=lambda x: x.metadata.get("chunk_order", float("inf")), ) - # group these extractions into groups of extraction_merge_count + # group these extractions into groups of chunk_merge_count extractions_groups = [ - extractions[i : i + extraction_merge_count] - for i in range(0, len(extractions), extraction_merge_count) + extractions[i : i + chunk_merge_count] + for i in range(0, len(extractions), chunk_merge_count) ] logger.info( - f"KGTriplesExtractionPipe: Extracting KG Triples for document and created {len(extractions_groups)} tasks, time from start: {time.time() - start_time:.2f} seconds", + f"KGExtractionPipe: Extracting KG Relationships for document and created {len(extractions_groups)} tasks, time from start: {time.time() - start_time:.2f} seconds", ) tasks = [ @@ -308,7 +303,7 @@ async def _run_logic( # type: ignore self.extract_kg( extractions=extractions_group, generation_config=generation_config, - max_knowledge_triples=max_knowledge_triples, + max_knowledge_relationships=max_knowledge_relationships, entity_types=entity_types, relation_types=relation_types, task_id=task_id, @@ -322,7 +317,7 @@ async def _run_logic( # type: ignore total_tasks = len(tasks) logger.info( - f"KGTriplesExtractionPipe: Waiting for {total_tasks} KG extraction tasks to complete", + f"KGExtractionPipe: Waiting for {total_tasks} KG extraction tasks to complete", ) for completed_task in asyncio.as_completed(tasks): @@ -331,15 +326,15 @@ async def _run_logic( # type: ignore completed_tasks += 1 if completed_tasks % 100 == 0: logger.info( - f"KGTriplesExtractionPipe: Completed {completed_tasks}/{total_tasks} KG extraction tasks", + f"KGExtractionPipe: Completed {completed_tasks}/{total_tasks} KG extraction tasks", ) except Exception as e: - logger.error(f"Error in Extracting KG Triples: {e}") + logger.error(f"Error in Extracting KG Relationships: {e}") yield R2RDocumentProcessingError( document_id=document_id, error_message=str(e), ) logger.info( - f"KGTriplesExtractionPipe: Completed {completed_tasks}/{total_tasks} KG extraction tasks, time from start: {time.time() - start_time:.2f} seconds", + f"KGExtractionPipe: Completed {completed_tasks}/{total_tasks} KG extraction tasks, time from start: {time.time() - start_time:.2f} seconds", ) diff --git a/py/core/pipes/kg/prompt_tuning.py b/py/core/pipes/kg/prompt_tuning.py index 7a1274d5b..0f997d20d 100644 --- a/py/core/pipes/kg/prompt_tuning.py +++ b/py/core/pipes/kg/prompt_tuning.py @@ -69,12 +69,12 @@ async def _run_logic( messages=await self.database_provider.prompt_handler.get_message_payload( task_prompt_name="prompt_tuning_task", task_inputs={ - "prompt_template": current_prompt.template, - "input_types": str(current_prompt.input_types), + "prompt_template": current_prompt["template"], + "input_types": str(current_prompt["input_types"]), "sample_data": chunks, }, ), - generation_config=self.database_provider.config.kg_creation_settings.generation_config, + generation_config=self.database_provider.config.graph_creation_settings.generation_config, ) if not tuned_prompt: diff --git a/py/core/pipes/kg/storage.py b/py/core/pipes/kg/storage.py index 1599268ec..069d314a8 100644 --- a/py/core/pipes/kg/storage.py +++ b/py/core/pipes/kg/storage.py @@ -1,16 +1,11 @@ import asyncio import logging -from typing import Any, AsyncGenerator, List, Optional +from typing import Any, AsyncGenerator from uuid import UUID -from core.base import ( - AsyncState, - DatabaseProvider, - EmbeddingProvider, - KGExtraction, - R2RDocumentProcessingError, -) +from core.base import AsyncState, KGExtraction, R2RDocumentProcessingError from core.base.pipes.base_pipe import AsyncPipe +from core.providers.database.postgres import PostgresDBProvider from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider logger = logging.getLogger() @@ -19,11 +14,11 @@ class KGStoragePipe(AsyncPipe): # TODO - Apply correct type hints to storage messages class Input(AsyncPipe.Input): - message: AsyncGenerator[List[Any], None] + message: AsyncGenerator[list[Any], None] def __init__( self, - database_provider: DatabaseProvider, + database_provider: PostgresDBProvider, config: AsyncPipe.PipeConfig, logging_provider: SqlitePersistentLoggingProvider, storage_batch_size: int = 1, @@ -49,17 +44,48 @@ def __init__( async def store( self, kg_extractions: list[KGExtraction], - ) -> None: + ): """ Stores a batch of knowledge graph extractions in the graph database. """ - try: - await self.database_provider.add_kg_extractions(kg_extractions) - return - except Exception as e: - error_message = f"Failed to store knowledge graph extractions in the database: {e}" - logger.error(error_message) - raise ValueError(error_message) + + total_entities, total_relationships = 0, 0 + + for extraction in kg_extractions: + + total_entities, total_relationships = ( + total_entities + len(extraction.entities), + total_relationships + len(extraction.relationships), + ) + + if extraction.entities: + if not extraction.entities[0].chunk_ids: + for i in range(len(extraction.entities)): + extraction.entities[i].chunk_ids = extraction.chunk_ids + extraction.entities[i].parent_id = ( + extraction.document_id + ) + + for entity in extraction.entities: + await self.database_provider.graph_handler.entities.create( + **entity.to_dict() + ) + + if extraction.relationships: + if not extraction.relationships[0].chunk_ids: + for i in range(len(extraction.relationships)): + extraction.relationships[i].chunk_ids = ( + extraction.chunk_ids + ) + extraction.relationships[i].document_id = ( + extraction.document_id + ) + + await self.database_provider.graph_handler.relationships.create( + extraction.relationships, + ) + + return (total_entities, total_relationships) async def _run_logic( # type: ignore self, @@ -68,7 +94,7 @@ async def _run_logic( # type: ignore run_id: UUID, *args: Any, **kwargs: Any, - ) -> AsyncGenerator[List[R2RDocumentProcessingError], None]: + ) -> AsyncGenerator[list[R2RDocumentProcessingError], None]: """ Executes the async knowledge graph storage pipe: storing knowledge graph extractions in the graph database. """ diff --git a/py/core/pipes/retrieval/kg_search_pipe.py b/py/core/pipes/retrieval/kg_search_pipe.py index ec7fe15b6..fdba8c78c 100644 --- a/py/core/pipes/retrieval/kg_search_pipe.py +++ b/py/core/pipes/retrieval/kg_search_pipe.py @@ -1,6 +1,6 @@ import json import logging -from typing import Any, AsyncGenerator, Optional +from typing import Any, AsyncGenerator from uuid import UUID from core.base import ( @@ -10,12 +10,13 @@ EmbeddingProvider, ) from core.base.abstractions import ( + GraphSearchResult, + GraphSearchSettings, KGCommunityResult, KGEntityResult, - KGSearchMethod, - KGSearchResult, + KGRelationshipResult, KGSearchResultType, - KGSearchSettings, + SearchSettings, ) from core.providers.logger.r2r_logger import SqlitePersistentLoggingProvider @@ -91,17 +92,17 @@ def filter_responses(self, map_responses): ) return responses - async def local_search( + async def search( self, input: GeneratorPipe.Input, state: AsyncState, run_id: UUID, - kg_search_settings: KGSearchSettings, + search_settings: SearchSettings, *args: Any, **kwargs: Any, - ) -> AsyncGenerator[KGSearchResult, None]: - # search over communities and - # do 3 searches. One over entities, one over relationships, one over communities + ) -> AsyncGenerator[GraphSearchResult, None]: + if search_settings.graph_settings.enabled == False: + return async for message in input.message: query_embedding = ( @@ -109,83 +110,129 @@ async def local_search( ) # entity search - search_type = "__Entity__" - async for search_result in await self.database_provider.vector_query( # type: ignore + search_type = "entities" + base_limit = search_settings.limit + + if search_type not in search_settings.graph_settings.limits: + logger.warning( + f"No limit set for graph search type {search_type}, defaulting to global settings limit of {base_limit}" + ) + async for search_result in self.database_provider.graph_handler.graph_search( # type: ignore message, search_type=search_type, - search_type_limits=kg_search_settings.local_search_limits[ - search_type - ], + limit=search_settings.graph_settings.limits.get( + search_type, base_limit + ), query_embedding=query_embedding, property_names=[ "name", "description", - "extraction_ids", + "chunk_ids", ], - filters=kg_search_settings.filters, - entities_level=kg_search_settings.entities_level, + filters=search_settings.filters, ): - yield KGSearchResult( + yield GraphSearchResult( content=KGEntityResult( name=search_result["name"], description=search_result["description"], ), - method=KGSearchMethod.LOCAL, result_type=KGSearchResultType.ENTITY, - extraction_ids=search_result["extraction_ids"], - metadata={"associated_query": message}, + score=( + search_result["similarity_score"] + if search_settings.include_scores + else None + ), + # chunk_ids=search_result["chunk_ids"], + metadata=( + { + "associated_query": message, + **(search_result["metadata"] or {}), + } + if search_settings.include_metadatas + else None + ), ) - # relationship search - # disabled for now. We will check evaluations and see if we need it - # search_type = "__Relationship__" - # async for search_result in self.database_provider.vector_query( # type: ignore - # input, - # search_type=search_type, - # search_type_limits=kg_search_settings.local_search_limits[ - # search_type - # ], - # query_embedding=query_embedding, - # property_names=[ - # "name", - # "description", - # "extraction_ids", - # "document_ids", - # ], - # ): - # yield KGSearchResult( - # content=KGRelationshipResult( - # name=search_result["name"], - # description=search_result["description"], - # ), - # method=KGSearchMethod.LOCAL, - # result_type=KGSearchResultType.RELATIONSHIP, - # # extraction_ids=search_result["extraction_ids"], - # # document_ids=search_result["document_ids"], - # metadata={"associated_query": message}, - # ) + # # relationship search + # # disabled for now. We will check evaluations and see if we need it + search_type = "relationships" + if search_type not in search_settings.graph_settings.limits: + logger.warning( + f"No limit set for graph search type {search_type}, defaulting to global settings limit of {base_limit}" + ) + async for search_result in self.database_provider.graph_handler.graph_search( # type: ignore + input, + search_type=search_type, + limit=search_settings.graph_settings.limits.get( + search_type, base_limit + ), + query_embedding=query_embedding, + property_names=[ + # "name", + "subject", + "predicate", + "object", + # "name", + "description", + # "chunk_ids", + # "document_ids", + ], + ): + try: + # TODO - remove this nasty hack + search_result["metadata"] = json.loads( + search_result["metadata"] + ) + except: + pass + + yield GraphSearchResult( + content=KGRelationshipResult( + # name=search_result["name"], + subject=search_result["subject"], + predicate=search_result["predicate"], + object=search_result["object"], + description=search_result["description"], + ), + result_type=KGSearchResultType.RELATIONSHIP, + score=( + search_result["similarity_score"] + if search_settings.include_scores + else None + ), + # chunk_ids=search_result["chunk_ids"], + # document_ids=search_result["document_ids"], + metadata=( + { + "associated_query": message, + **(search_result["metadata"] or {}), + } + if search_settings.include_metadatas + else None + ), + ) # community search - search_type = "__Community__" - async for search_result in await self.database_provider.vector_query( # type: ignore + search_type = "communities" + async for search_result in self.database_provider.graph_handler.graph_search( # type: ignore message, search_type=search_type, - search_type_limits=kg_search_settings.local_search_limits[ - search_type - ], - embedding_type="embedding", + limit=search_settings.graph_settings.limits.get( + search_type, base_limit + ), + # embedding_type="embedding", query_embedding=query_embedding, property_names=[ - "community_number", + "community_id", "name", "findings", "rating", "rating_explanation", "summary", ], - filters=kg_search_settings.filters, + filters=search_settings.filters, ): - yield KGSearchResult( + yield GraphSearchResult( content=KGCommunityResult( name=search_result["name"], summary=search_result["summary"], @@ -193,11 +240,20 @@ async def local_search( rating_explanation=search_result["rating_explanation"], findings=search_result["findings"], ), - method=KGSearchMethod.LOCAL, result_type=KGSearchResultType.COMMUNITY, - metadata={ - "associated_query": message, - }, + metadata=( + { + "associated_query": message, + **(search_result["metadata"] or {}), + } + if search_settings.include_metadatas + else None + ), + score=( + search_result["similarity_score"] + if search_settings.include_scores + else None + ), ) async def _run_logic( # type: ignore @@ -205,17 +261,10 @@ async def _run_logic( # type: ignore input: GeneratorPipe.Input, state: AsyncState, run_id: UUID, - kg_search_settings: KGSearchSettings, + search_settings: GraphSearchSettings, *args: Any, **kwargs: Any, - ) -> AsyncGenerator[KGSearchResult, None]: - kg_search_type = kg_search_settings.kg_search_type + ) -> AsyncGenerator[GraphSearchResult, None]: - if kg_search_type == "local": - logger.info("Performing KG local search") - async for result in self.local_search( - input, state, run_id, kg_search_settings - ): - yield result - else: - raise ValueError(f"Unsupported KG search type: {kg_search_type}") + async for result in self.search(input, state, run_id, search_settings): + yield result diff --git a/py/core/pipes/retrieval/multi_search.py b/py/core/pipes/retrieval/multi_search.py index 131f71f9b..068751039 100644 --- a/py/core/pipes/retrieval/multi_search.py +++ b/py/core/pipes/retrieval/multi_search.py @@ -1,11 +1,11 @@ from copy import copy, deepcopy -from typing import Any, AsyncGenerator, Dict, List, Optional +from typing import Any, AsyncGenerator, Optional from uuid import UUID from core.base.abstractions import ( + ChunkSearchResult, GenerationConfig, SearchSettings, - VectorSearchResult, ) from core.base.pipes.base_pipe import AsyncPipe @@ -51,11 +51,11 @@ async def _run_logic( # type: ignore input: Any, state: Any, run_id: UUID, - vector_search_settings: SearchSettings, + search_settings: SearchSettings, query_transform_generation_config: Optional[GenerationConfig] = None, *args: Any, **kwargs: Any, - ) -> AsyncGenerator[VectorSearchResult, None]: + ) -> AsyncGenerator[ChunkSearchResult, None]: query_transform_generation_config = ( query_transform_generation_config or copy(kwargs.get("rag_generation_config", None)) @@ -73,22 +73,21 @@ async def _run_logic( # type: ignore ) if self.config.use_rrf: - vector_search_settings.search_limit = ( - self.config.expansion_factor - * vector_search_settings.search_limit + search_settings.limit = ( + self.config.expansion_factor * search_settings.limit ) results = [] async for search_result in await self.vector_search_pipe.run( self.vector_search_pipe.Input(message=query_generator), state, - vector_search_settings=vector_search_settings, + search_settings=search_settings, *args, **kwargs, ): results.append(search_result) # Collection results by their associated queries - grouped_results: dict[str, list[VectorSearchResult]] = {} + grouped_results: dict[str, list[ChunkSearchResult]] = {} for result in results: query = result.metadata["associated_query"] if query not in grouped_results: @@ -96,27 +95,27 @@ async def _run_logic( # type: ignore grouped_results[query].append(result) fused_results = self.reciprocal_rank_fusion(grouped_results) - for result in fused_results[: vector_search_settings.search_limit]: + for result in fused_results[: search_settings.limit]: yield result else: async for search_result in await self.vector_search_pipe.run( self.vector_search_pipe.Input(message=query_generator), state, - vector_search_settings=vector_search_settings, + search_settings=search_settings, *args, **kwargs, ): yield search_result def reciprocal_rank_fusion( - self, all_results: Dict[str, List[VectorSearchResult]] - ) -> List[VectorSearchResult]: + self, all_results: dict[str, list[ChunkSearchResult]] + ) -> list[ChunkSearchResult]: document_scores: dict[UUID, float] = {} - document_results: dict[UUID, VectorSearchResult] = {} + document_results: dict[UUID, ChunkSearchResult] = {} document_queries: dict[UUID, set[str]] = {} for query, results in all_results.items(): for rank, result in enumerate(results, 1): - doc_id = result.extraction_id + doc_id = result.chunk_id if doc_id not in document_scores: document_scores[doc_id] = 0 document_results[doc_id] = result @@ -130,7 +129,7 @@ def reciprocal_rank_fusion( document_scores.items(), key=lambda x: x[1], reverse=True ) - # Reconstruct VectorSearchResults with new ranking, RRF score, and associated queries + # Reconstruct ChunkSearchResults with new ranking, RRF score, and associated queries fused_results = [] for doc_id, rrf_score in sorted_docs: result = deepcopy(document_results[doc_id]) diff --git a/py/core/pipes/retrieval/routing_search_pipe.py b/py/core/pipes/retrieval/routing_search_pipe.py index ca631809a..1c675b54d 100644 --- a/py/core/pipes/retrieval/routing_search_pipe.py +++ b/py/core/pipes/retrieval/routing_search_pipe.py @@ -1,13 +1,13 @@ -from typing import Any, AsyncGenerator, Dict +from typing import Any, AsyncGenerator from uuid import UUID -from core.base import AsyncPipe, AsyncState, SearchSettings, VectorSearchResult +from core.base import AsyncPipe, AsyncState, ChunkSearchResult, SearchSettings class RoutingSearchPipe(AsyncPipe): def __init__( self, - search_pipes: Dict[str, AsyncPipe], + search_pipes: dict[str, AsyncPipe], default_strategy: str, config: AsyncPipe.PipeConfig, *args, @@ -22,23 +22,21 @@ async def _run_logic( # type: ignore input: AsyncPipe.Input, state: AsyncState, run_id: UUID, - vector_search_settings: SearchSettings, + search_settings: SearchSettings, *args: Any, **kwargs: Any, - ) -> AsyncGenerator[VectorSearchResult, None]: - search_pipe = self.search_pipes.get( - vector_search_settings.search_strategy - ) + ) -> AsyncGenerator[ChunkSearchResult, None]: + search_pipe = self.search_pipes.get(search_settings.search_strategy) if not search_pipe: raise ValueError( - f"Search strategy {vector_search_settings.search_strategy} not found" + f"Search strategy {search_settings.search_strategy} not found" ) async for result in search_pipe._run_logic( # type: ignore input, state, run_id, - vector_search_settings=vector_search_settings, + search_settings=search_settings, *args, **kwargs, ): diff --git a/py/core/pipes/retrieval/search_rag_pipe.py b/py/core/pipes/retrieval/search_rag_pipe.py index a7cc4635d..f9d922755 100644 --- a/py/core/pipes/retrieval/search_rag_pipe.py +++ b/py/core/pipes/retrieval/search_rag_pipe.py @@ -91,19 +91,19 @@ async def _collect_context( total_results: int, ) -> Tuple[str, int]: context = f"Query:\n{query}\n\n" - if results.vector_search_results: + if results.chunk_search_results: context += f"Vector Search Results({iteration}):\n" it = total_results + 1 - for result in results.vector_search_results: + for result in results.chunk_search_results: context += f"[{it}]: {result.text}\n\n" it += 1 total_results = ( it - 1 ) # Update total_results based on the last index used - if results.kg_search_results: + if results.graph_search_results: context += f"Knowledge Graph ({iteration}):\n" it = total_results + 1 - for search_results in results.kg_search_results: # [1]: + for search_results in results.graph_search_results: # [1]: if associated_query := search_results.metadata.get( "associated_query" ): diff --git a/py/core/pipes/retrieval/streaming_rag_pipe.py b/py/core/pipes/retrieval/streaming_rag_pipe.py index 8871519db..b8b01202d 100644 --- a/py/core/pipes/retrieval/streaming_rag_pipe.py +++ b/py/core/pipes/retrieval/streaming_rag_pipe.py @@ -19,7 +19,7 @@ class StreamingSearchRAGPipe(GeneratorPipe): - VECTOR_SEARCH_STREAM_MARKER = ( + CHUNK_SEARCH_STREAM_MARKER = ( "search" # TODO - change this to vector_search in next major release ) KG_LOCAL_SEARCH_STREAM_MARKER = "kg_local_search" diff --git a/py/core/pipes/retrieval/vector_search_pipe.py b/py/core/pipes/retrieval/vector_search_pipe.py index 292f91143..4a3696f1e 100644 --- a/py/core/pipes/retrieval/vector_search_pipe.py +++ b/py/core/pipes/retrieval/vector_search_pipe.py @@ -6,11 +6,11 @@ from core.base import ( AsyncPipe, AsyncState, + ChunkSearchResult, DatabaseProvider, EmbeddingProvider, EmbeddingPurpose, SearchSettings, - VectorSearchResult, ) from ..abstractions.search_pipe import SearchPipe @@ -47,35 +47,49 @@ async def search( # type: ignore search_settings: SearchSettings, *args: Any, **kwargs: Any, - ) -> AsyncGenerator[VectorSearchResult, None]: + ) -> AsyncGenerator[ChunkSearchResult, None]: + if search_settings.chunk_settings.enabled == False: + return + search_settings.filters = ( search_settings.filters or self.config.filters ) - search_settings.search_limit = ( - search_settings.search_limit or self.config.search_limit - ) + search_settings.limit = search_settings.limit or self.config.limit results = [] query_vector = await self.embedding_provider.async_get_embedding( message, purpose=EmbeddingPurpose.QUERY, ) - search_results = await ( - self.database_provider.hybrid_search( + if ( + search_settings.use_fulltext_search + and search_settings.use_semantic_search + ) or search_settings.use_hybrid_search: + + search_results = await self.database_provider.hybrid_search( query_vector=query_vector, query_text=message, search_settings=search_settings, ) - if search_settings.use_hybrid_search - else self.database_provider.semantic_search( + elif search_settings.use_fulltext_search: + search_results = await self.database_provider.full_text_search( + query_text=message, + search_settings=search_settings, + ) + elif search_settings.use_semantic_search: + search_results = await self.database_provider.semantic_search( query_vector=query_vector, search_settings=search_settings, ) - ) + else: + raise ValueError( + "At least one of use_fulltext_search or use_semantic_search must be True" + ) + reranked_results = await self.embedding_provider.arerank( query=message, results=search_results, - limit=search_settings.search_limit, + limit=search_settings.limit, ) if kwargs.get("include_title_if_available", False): for result in reranked_results: @@ -93,10 +107,10 @@ async def _run_logic( # type: ignore input: AsyncPipe.Input, state: AsyncState, run_id: UUID, - vector_search_settings: SearchSettings = SearchSettings(), + search_settings: SearchSettings = SearchSettings(), *args: Any, **kwargs: Any, - ) -> AsyncGenerator[VectorSearchResult, None]: + ) -> AsyncGenerator[ChunkSearchResult, None]: async for search_request in input.message: await self.enqueue_log( run_id=run_id, key="search_query", value=search_request @@ -105,7 +119,7 @@ async def _run_logic( # type: ignore search_results = [] async for result in self.search( search_request, - vector_search_settings, + search_settings, *args, **kwargs, ): diff --git a/py/core/providers/auth/r2r_auth.py b/py/core/providers/auth/r2r_auth.py index c9e93c4b8..89099108c 100644 --- a/py/core/providers/auth/r2r_auth.py +++ b/py/core/providers/auth/r2r_auth.py @@ -9,6 +9,7 @@ from core.base import ( AuthConfig, AuthProvider, + CollectionResponse, CryptoProvider, DatabaseProvider, EmailProvider, @@ -16,7 +17,7 @@ Token, TokenData, ) -from core.base.api.models import UserResponse +from core.base.api.models import User DEFAULT_ACCESS_LIFETIME_IN_MINUTES = 3600 DEFAULT_REFRESH_LIFETIME_IN_DAYS = 7 @@ -55,7 +56,9 @@ def __init__( async def initialize(self): try: user = await self.register( - email=self.admin_email, password=self.admin_password + email=self.admin_email, + password=self.admin_password, + is_superuser=True, ) await self.database_provider.mark_user_as_superuser(user.id) except R2RException: @@ -113,7 +116,7 @@ async def decode_token(self, token: str) -> TokenData: except jwt.InvalidTokenError as e: raise R2RException(status_code=401, message="Invalid token") from e - async def user(self, token: str = Depends(oauth2_scheme)) -> UserResponse: + async def user(self, token: str = Depends(oauth2_scheme)) -> User: token_data = await self.decode_token(token) if not token_data.email: raise R2RException( @@ -127,23 +130,32 @@ async def user(self, token: str = Depends(oauth2_scheme)) -> UserResponse: return user def get_current_active_user( - self, current_user: UserResponse = Depends(user) - ) -> UserResponse: + self, current_user: User = Depends(user) + ) -> User: if not current_user.is_active: raise R2RException(status_code=400, message="Inactive user") return current_user - async def register(self, email: str, password: str) -> UserResponse: + async def register( + self, email: str, password: str, is_superuser: bool = False + ) -> User: # Create new user and give them a default collection - new_user = await self.database_provider.create_user(email, password) - default_collection = ( - await self.database_provider.create_default_collection( - new_user.id, + new_user = await self.database_provider.create_user( + email, password, is_superuser + ) + default_collection: CollectionResponse = ( + await self.database_provider.create_collection( + owner_id=new_user.id, ) ) + await self.database_provider.graph_handler.create( + collection_id=default_collection.id, + name=default_collection.name, + description=default_collection.description, + ) await self.database_provider.add_user_to_collection( - new_user.id, default_collection.collection_id + new_user.id, default_collection.id ) if self.config.require_email_verification: @@ -235,7 +247,7 @@ async def login(self, email: str, password: str) -> dict[str, Token]: status_code=401, message="Incorrect email or password" ) - if not user.is_verified: + if not user.is_verified and self.config.require_email_verification: logger.warning(f"Unverified user attempted login: {email}") raise R2RException(status_code=401, message="Email not verified") @@ -272,7 +284,7 @@ async def refresh_access_token( } async def change_password( - self, user: UserResponse, current_password: str, new_password: str + self, user: User, current_password: str, new_password: str ) -> dict[str, str]: if not isinstance(user.hashed_password, str): logger.error( diff --git a/py/core/providers/auth/supabase.py b/py/core/providers/auth/supabase.py index b87edbf9c..473614094 100644 --- a/py/core/providers/auth/supabase.py +++ b/py/core/providers/auth/supabase.py @@ -15,7 +15,7 @@ Token, TokenData, ) -from core.base.api.models import UserResponse +from core.base.api.models import User logger = logging.getLogger() @@ -69,16 +69,15 @@ async def decode_token(self, token: str) -> TokenData: "decode_token is not used with Supabase authentication" ) - async def register(self, email: str, password: str) -> UserResponse: # type: ignore + async def register(self, email: str, password: str) -> User: # type: ignore # Use Supabase client to create a new user - user = self.supabase.auth.sign_up(email=email, password=password) - if user: + if user := self.supabase.auth.sign_up(email=email, password=password): raise R2RException( status_code=400, message="Supabase provider implementation is still under construction", ) - # return UserResponse( + # return User( # id=user.id, # email=user.email, # is_active=True, @@ -96,9 +95,9 @@ async def verify_email( self, email: str, verification_code: str ) -> dict[str, str]: # Use Supabase client to verify email - response = self.supabase.auth.verify_email(email, verification_code) - - if response: + if response := self.supabase.auth.verify_email( + email, verification_code + ): return {"message": "Email verified successfully"} else: raise R2RException( @@ -107,8 +106,9 @@ async def verify_email( async def login(self, email: str, password: str) -> dict[str, Token]: # Use Supabase client to authenticate user and get tokens - response = self.supabase.auth.sign_in(email=email, password=password) - if response: + if response := self.supabase.auth.sign_in( + email=email, password=password + ): access_token = response.access_token refresh_token = response.refresh_token return { @@ -126,9 +126,7 @@ async def refresh_access_token( self, refresh_token: str ) -> dict[str, Token]: # Use Supabase client to refresh access token - response = self.supabase.auth.refresh_access_token(refresh_token) - - if response: + if response := self.supabase.auth.refresh_access_token(refresh_token): new_access_token = response.access_token new_refresh_token = response.refresh_token return { @@ -144,11 +142,10 @@ async def refresh_access_token( status_code=401, message="Invalid refresh token" ) - async def user(self, token: str = Depends(oauth2_scheme)) -> UserResponse: + async def user(self, token: str = Depends(oauth2_scheme)) -> User: # Use Supabase client to get user details from token - user = self.supabase.auth.get_user(token).user - if user: - return UserResponse( + if user := self.supabase.auth.get_user(token).user: + return User( id=user.id, email=user.email, is_active=True, # Assuming active if exists in Supabase @@ -164,22 +161,20 @@ async def user(self, token: str = Depends(oauth2_scheme)) -> UserResponse: raise R2RException(status_code=401, message="Invalid token") def get_current_active_user( - self, current_user: UserResponse = Depends(user) - ) -> UserResponse: + self, current_user: User = Depends(user) + ) -> User: # Check if user is active if not current_user.is_active: raise R2RException(status_code=400, message="Inactive user") return current_user async def change_password( - self, user: UserResponse, current_password: str, new_password: str + self, user: User, current_password: str, new_password: str ) -> dict[str, str]: # Use Supabase client to update user password - response = self.supabase.auth.update( + if response := self.supabase.auth.update( user.id, {"password": new_password} - ) - - if response: + ): return {"message": "Password changed successfully"} else: raise R2RException( @@ -188,9 +183,7 @@ async def change_password( async def request_password_reset(self, email: str) -> dict[str, str]: # Use Supabase client to send password reset email - response = self.supabase.auth.send_password_reset_email(email) - - if response: + if response := self.supabase.auth.send_password_reset_email(email): return { "message": "If the email exists, a reset link has been sent" } @@ -203,11 +196,9 @@ async def confirm_password_reset( self, reset_token: str, new_password: str ) -> dict[str, str]: # Use Supabase client to reset password with token - response = self.supabase.auth.reset_password_for_email( + if response := self.supabase.auth.reset_password_for_email( reset_token, new_password - ) - - if response: + ): return {"message": "Password reset successfully"} else: raise R2RException( diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index 71e530047..92f8edd15 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -1,7 +1,7 @@ import asyncio import logging from contextlib import asynccontextmanager -from typing import Any, Optional, Sequence, Union +from typing import Optional import asyncpg @@ -134,11 +134,14 @@ async def execute_many(self, query, params=None, batch_size=1000): async with self.pool.get_connection() as conn: async with conn.transaction(): if params: + results = [] for i in range(0, len(params), batch_size): param_batch = params[i : i + batch_size] - await conn.executemany(query, param_batch) + result = await conn.executemany(query, param_batch) + results.append(result) + return results else: - await conn.executemany(query) + return await conn.executemany(query) async def fetch_query(self, query, params=None): if not self.pool: @@ -160,3 +163,25 @@ async def fetchrow_query(self, query, params=None): return await conn.fetchrow(query, *params) else: return await conn.fetchrow(query) + + @asynccontextmanager + async def transaction(self, isolation_level=None): + """ + Async context manager for database transactions. + + Args: + isolation_level: Optional isolation level for the transaction + + Yields: + The connection manager instance for use within the transaction + """ + if not self.pool: + raise ValueError("PostgresConnectionManager is not initialized.") + + async with self.pool.get_connection() as conn: + async with conn.transaction(isolation=isolation_level): + try: + yield self + except Exception as e: + logger.error(f"Transaction failed: {str(e)}") + raise diff --git a/py/core/providers/database/collection.py b/py/core/providers/database/collection.py index e577a29ab..faebc6efb 100644 --- a/py/core/providers/database/collection.py +++ b/py/core/providers/database/collection.py @@ -1,31 +1,32 @@ import json import logging -from datetime import datetime -from typing import Optional, Union +from typing import Any, Optional from uuid import UUID, uuid4 +from asyncpg.exceptions import UniqueViolationError from fastapi import HTTPException from core.base import ( - CollectionHandler, + CollectionsHandler, DatabaseConfig, KGExtractionStatus, R2RException, generate_default_user_collection_id, ) -from core.base.abstractions import DocumentInfo, DocumentType, IngestionStatus -from core.base.api.models import CollectionOverviewResponse, CollectionResponse -from core.utils import ( - generate_collection_id_from_name, - generate_default_user_collection_id, +from core.base.abstractions import ( + DocumentResponse, + DocumentType, + IngestionStatus, ) +from core.base.api.models import CollectionResponse +from core.utils import generate_default_user_collection_id from .base import PostgresConnectionManager logger = logging.getLogger() -class PostgresCollectionHandler(CollectionHandler): +class PostgresCollectionHandler(CollectionsHandler): TABLE_NAME = "collections" def __init__( @@ -40,45 +41,23 @@ def __init__( async def create_tables(self) -> None: query = f""" CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} ( - collection_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + owner_id UUID, name TEXT NOT NULL, description TEXT, - kg_enrichment_status TEXT DEFAULT 'PENDING', + graph_sync_status TEXT DEFAULT 'pending', + graph_cluster_status TEXT DEFAULT 'pending', created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); """ await self.connection_manager.execute_query(query) - async def create_default_collection( - self, user_id: Optional[UUID] = None - ) -> CollectionResponse: - """Create a default collection if it doesn't exist.""" - - if user_id: - default_collection_uuid = generate_default_user_collection_id( - user_id - ) - else: - default_collection_uuid = generate_collection_id_from_name( - self.config.default_collection_name - ) - - if not await self.collection_exists(default_collection_uuid): - logger.info("Initializing a new default collection...") - return await self.create_collection( - name=self.config.default_collection_name, - description=self.config.default_collection_description, - collection_id=default_collection_uuid, - ) - - return await self.get_collection(default_collection_uuid) - async def collection_exists(self, collection_id: UUID) -> bool: """Check if a collection exists.""" query = f""" SELECT 1 FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} - WHERE collection_id = $1 + WHERE id = $1 """ result = await self.connection_manager.fetchrow_query( query, [collection_id] @@ -87,59 +66,56 @@ async def collection_exists(self, collection_id: UUID) -> bool: async def create_collection( self, - name: str, + owner_id: UUID, + name: Optional[str] = None, description: str = "", collection_id: Optional[UUID] = None, ) -> CollectionResponse: - current_time = datetime.utcnow() + + if not name and not collection_id: + name = self.config.default_collection_name + collection_id = generate_default_user_collection_id(owner_id) + query = f""" - INSERT INTO {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} (collection_id, name, description, created_at, updated_at) - VALUES ($1, $2, $3, $4, $5) - RETURNING collection_id, name, description, created_at, updated_at + INSERT INTO {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} + (id, owner_id, name, description) + VALUES ($1, $2, $3, $4) + RETURNING id, owner_id, name, description, graph_sync_status, graph_cluster_status, created_at, updated_at """ params = [ collection_id or uuid4(), + owner_id, name, description, - current_time, - current_time, ] - result = await self.connection_manager.fetchrow_query(query, params) - if not result: - raise R2RException(status_code=404, message="Collection not found") - - return CollectionResponse( - collection_id=result["collection_id"], - name=result["name"], - description=result["description"], - created_at=result["created_at"], - updated_at=result["updated_at"], - ) - - async def get_collection(self, collection_id: UUID) -> CollectionResponse: - """Get a collection by its ID.""" - if not await self.collection_exists(collection_id): - raise R2RException(status_code=404, message="Collection not found") - - query = f""" - SELECT collection_id, name, description, created_at, updated_at - FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} - WHERE collection_id = $1 - """ - result = await self.connection_manager.fetchrow_query( - query, [collection_id] - ) - if not result: - raise R2RException(status_code=404, message="Collection not found") + try: + result = await self.connection_manager.fetchrow_query( + query=query, + params=params, + ) + if not result: + raise R2RException( + status_code=404, message="Collection not found" + ) - return CollectionResponse( - collection_id=result["collection_id"], - name=result["name"], - description=result["description"], - created_at=result["created_at"], - updated_at=result["updated_at"], - ) + return CollectionResponse( + id=result["id"], + owner_id=result["owner_id"], + name=result["name"], + description=result["description"], + graph_cluster_status=result["graph_cluster_status"], + graph_sync_status=result["graph_sync_status"], + created_at=result["created_at"], + updated_at=result["updated_at"], + user_count=0, + document_count=0, + ) + except UniqueViolationError: + raise R2RException( + message="Collection with this ID already exists", + status_code=409, + ) async def update_collection( self, @@ -153,14 +129,17 @@ async def update_collection( update_fields = [] params: list = [] + param_index = 1 if name is not None: - update_fields.append("name = $1") + update_fields.append(f"name = ${param_index}") params.append(name) + param_index += 1 if description is not None: - update_fields.append("description = ${}".format(len(params) + 1)) + update_fields.append(f"description = ${param_index}") params.append(description) + param_index += 1 if not update_fields: raise R2RException(status_code=400, message="No fields to update") @@ -169,28 +148,49 @@ async def update_collection( params.append(collection_id) query = f""" - UPDATE {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} - SET {', '.join(update_fields)} - WHERE collection_id = ${len(params)} - RETURNING collection_id, name, description, created_at, updated_at + WITH updated_collection AS ( + UPDATE {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} + SET {', '.join(update_fields)} + WHERE id = ${param_index} + RETURNING id, owner_id, name, description, graph_sync_status, graph_cluster_status, created_at, updated_at + ) + SELECT + uc.*, + COUNT(DISTINCT u.id) FILTER (WHERE u.id IS NOT NULL) as user_count, + COUNT(DISTINCT d.id) FILTER (WHERE d.id IS NOT NULL) as document_count + FROM updated_collection uc + LEFT JOIN {self._get_table_name('users')} u ON uc.id = ANY(u.collection_ids) + LEFT JOIN {self._get_table_name('documents')} d ON uc.id = ANY(d.collection_ids) + GROUP BY uc.id, uc.owner_id, uc.name, uc.description, uc.graph_sync_status, uc.graph_cluster_status, uc.created_at, uc.updated_at """ + try: + result = await self.connection_manager.fetchrow_query( + query, params + ) + if not result: + raise R2RException( + status_code=404, message="Collection not found" + ) - result = await self.connection_manager.fetchrow_query(query, params) - if not result: - raise R2RException(status_code=404, message="Collection not found") - - return CollectionResponse( - collection_id=result["collection_id"], - name=result["name"], - description=result["description"], - created_at=result["created_at"], - updated_at=result["updated_at"], - ) + return CollectionResponse( + id=result["id"], + owner_id=result["owner_id"], + name=result["name"], + description=result["description"], + graph_sync_status=result["graph_sync_status"], + graph_cluster_status=result["graph_cluster_status"], + created_at=result["created_at"], + updated_at=result["updated_at"], + user_count=result["user_count"], + document_count=result["document_count"], + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An error occurred while updating the collection: {e}", + ) async def delete_collection_relational(self, collection_id: UUID) -> None: - # async with self.connection_manager.pool.get_connection() as conn: # type: ignore - # async with conn.transaction(): - # try: # Remove collection_id from users user_update_query = f""" UPDATE {self._get_table_name('users')} @@ -204,7 +204,7 @@ async def delete_collection_relational(self, collection_id: UUID) -> None: # Remove collection_id from documents document_update_query = f""" WITH updated AS ( - UPDATE {self._get_table_name('document_info')} + UPDATE {self._get_table_name('documents')} SET collection_ids = array_remove(collection_ids, $1) WHERE $1 = ANY(collection_ids) RETURNING 1 @@ -218,8 +218,8 @@ async def delete_collection_relational(self, collection_id: UUID) -> None: # Delete the collection delete_query = f""" DELETE FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} - WHERE collection_id = $1 - RETURNING collection_id + WHERE id = $1 + RETURNING id """ deleted = await self.connection_manager.fetchrow_query( delete_query, [collection_id] @@ -228,71 +228,9 @@ async def delete_collection_relational(self, collection_id: UUID) -> None: if not deleted: raise R2RException(status_code=404, message="Collection not found") - async def list_collections( - self, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[CollectionResponse], int]]: - """List collections with pagination.""" - query = f""" - SELECT collection_id, name, description, created_at, updated_at, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} - ORDER BY name - OFFSET $1 - """ - - conditions = [offset] - if limit != -1: - query += " LIMIT $2" - conditions.append(limit) - - results = await self.connection_manager.fetch_query(query, conditions) - if not results: - logger.info("No collections found.") - return {"results": [], "total_entries": 0} - - collections = [ - CollectionResponse( - collection_id=row["collection_id"], - name=row["name"], - description=row["description"], - created_at=row["created_at"], - updated_at=row["updated_at"], - ) - for row in results - ] - total_entries = results[0]["total_entries"] if results else 0 - - return {"results": collections, "total_entries": total_entries} - - async def get_collections_by_ids( - self, collection_ids: list[UUID] - ) -> list[CollectionResponse]: - query = f""" - SELECT collection_id, name, description, created_at, updated_at - FROM {self._get_table_name("collections")} - WHERE collection_id = ANY($1) - """ - results = await self.connection_manager.fetch_query( - query, [collection_ids] - ) - if len(results) != len(collection_ids): - raise R2RException( - status_code=404, - message=f"These collections were not found: {set(collection_ids) - {row['collection_id'] for row in results}}", - ) - return [ - CollectionResponse( - collection_id=row["collection_id"], - name=row["name"], - description=row["description"], - created_at=row["created_at"], - updated_at=row["updated_at"], - ) - for row in results - ] - async def documents_in_collection( - self, collection_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[DocumentInfo], int]]: + self, collection_id: UUID, offset: int, limit: int + ) -> dict[str, list[DocumentResponse] | int]: """ Get all documents in a specific collection with pagination. Args: @@ -300,17 +238,17 @@ async def documents_in_collection( offset (int): The number of documents to skip. limit (int): The maximum number of documents to return. Returns: - List[DocumentInfo]: A list of DocumentInfo objects representing the documents in the collection. + List[DocumentResponse]: A list of DocumentResponse objects representing the documents in the collection. Raises: R2RException: If the collection doesn't exist. """ if not await self.collection_exists(collection_id): raise R2RException(status_code=404, message="Collection not found") query = f""" - SELECT d.document_id, d.user_id, d.type, d.metadata, d.title, d.version, - d.size_in_bytes, d.ingestion_status, d.kg_extraction_status, d.created_at, d.updated_at, + SELECT d.id, d.owner_id, d.type, d.metadata, d.title, d.version, + d.size_in_bytes, d.ingestion_status, d.extraction_status, d.created_at, d.updated_at, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('document_info')} d + FROM {self._get_table_name('documents')} d WHERE $1 = ANY(d.collection_ids) ORDER BY d.created_at DESC OFFSET $2 @@ -323,19 +261,17 @@ async def documents_in_collection( results = await self.connection_manager.fetch_query(query, conditions) documents = [ - DocumentInfo( - id=row["document_id"], + DocumentResponse( + id=row["id"], collection_ids=[collection_id], - user_id=row["user_id"], + owner_id=row["owner_id"], document_type=DocumentType(row["type"]), metadata=json.loads(row["metadata"]), title=row["title"], version=row["version"], size_in_bytes=row["size_in_bytes"], ingestion_status=IngestionStatus(row["ingestion_status"]), - kg_extraction_status=KGExtractionStatus( - row["kg_extraction_status"] - ), + extraction_status=KGExtractionStatus(row["extraction_status"]), created_at=row["created_at"], updated_at=row["updated_at"], ) @@ -347,95 +283,101 @@ async def documents_in_collection( async def get_collections_overview( self, - collection_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict[str, Union[list[CollectionOverviewResponse], int]]: - """Get an overview of collections, optionally filtered by collection IDs, with pagination.""" + offset: int, + limit: int, + filter_user_ids: Optional[list[UUID]] = None, + filter_document_ids: Optional[list[UUID]] = None, + filter_collection_ids: Optional[list[UUID]] = None, + ) -> dict[str, list[CollectionResponse] | int]: + conditions = [] + params: list[Any] = [] + param_index = 1 + + # Build JOIN clauses based on filters + document_join = "JOIN" if filter_document_ids else "LEFT JOIN" + user_join = "JOIN" if filter_user_ids else "LEFT JOIN" + + if filter_user_ids: + conditions.append(f"u.id = ANY(${param_index})") + params.append(filter_user_ids) + param_index += 1 + + if filter_document_ids: + conditions.append(f"d.id = ANY(${param_index})") + params.append(filter_document_ids) + param_index += 1 + + if filter_collection_ids: + conditions.append(f"c.id = ANY(${param_index})") + params.append(filter_collection_ids) + param_index += 1 + + where_clause = ( + f"WHERE {' AND '.join(conditions)}" if conditions else "" + ) + query = f""" - WITH collection_overview AS ( - SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, g.kg_enrichment_status, - COUNT(DISTINCT u.user_id) AS user_count, - COUNT(DISTINCT d.document_id) AS document_count - FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} g - LEFT JOIN {self._get_table_name('users')} u ON g.collection_id = ANY(u.collection_ids) - LEFT JOIN {self._get_table_name('document_info')} d ON g.collection_id = ANY(d.collection_ids) - {' WHERE g.collection_id = ANY($1)' if collection_ids else ''} - GROUP BY g.collection_id, g.name, g.description, g.created_at, g.updated_at, g.kg_enrichment_status - ), - counted_overview AS ( - SELECT *, COUNT(*) OVER() AS total_entries - FROM collection_overview + WITH collection_stats AS ( + SELECT + c.id, + c.owner_id, + c.name, + c.description, + c.created_at, + c.updated_at, + c.graph_sync_status, + c.graph_cluster_status, + COUNT(DISTINCT u.id) FILTER (WHERE u.id IS NOT NULL) as user_count, + COUNT(DISTINCT d.id) FILTER (WHERE d.id IS NOT NULL) as document_count + FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} c + {user_join} {self._get_table_name('users')} u ON c.id = ANY(u.collection_ids) + {document_join} {self._get_table_name('documents')} d ON c.id = ANY(d.collection_ids) + {where_clause} + GROUP BY c.id, c.owner_id, c.name, c.description, c.created_at, c.updated_at, c.graph_cluster_status ) - SELECT * FROM counted_overview - ORDER BY name - OFFSET ${2 if collection_ids else 1} - {f'LIMIT ${3 if collection_ids else 2}' if limit != -1 else ''} + SELECT + *, + COUNT(*) OVER() AS total_entries + FROM collection_stats + ORDER BY created_at DESC + OFFSET ${param_index} """ - - params: list = [] - if collection_ids: - params.append(collection_ids) params.append(offset) - if limit != -1: - params.append(limit) - - results = await self.connection_manager.fetch_query(query, params) + param_index += 1 - if not results: - logger.info("No collections found.") - return {"results": [], "total_entries": 0} - - collections = [ - CollectionOverviewResponse( - collection_id=row["collection_id"], - name=row["name"], - description=row["description"], - created_at=row["created_at"], - updated_at=row["updated_at"], - user_count=row["user_count"], - document_count=row["document_count"], - kg_enrichment_status=row["kg_enrichment_status"], - ) - for row in results - ] - - total_entries = results[0]["total_entries"] if results else 0 - - return {"results": collections, "total_entries": total_entries} - - async def get_collections_for_user( - self, user_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[CollectionResponse], int]]: - query = f""" - SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} g - JOIN {self._get_table_name('users')} u ON g.collection_id = ANY(u.collection_ids) - WHERE u.user_id = $1 - ORDER BY g.name - OFFSET $2 - """ - - params = [user_id, offset] if limit != -1: - query += " LIMIT $3" + query += f" LIMIT ${param_index}" params.append(limit) - results = await self.connection_manager.fetch_query(query, params) + try: + results = await self.connection_manager.fetch_query(query, params) + if not results: + return {"results": [], "total_entries": 0} + + total_entries = results[0]["total_entries"] if results else 0 + + collections = [ + CollectionResponse( + id=row["id"], + owner_id=row["owner_id"], + name=row["name"], + description=row["description"], + graph_sync_status=row["graph_sync_status"], + graph_cluster_status=row["graph_cluster_status"], + created_at=row["created_at"], + updated_at=row["updated_at"], + user_count=row["user_count"], + document_count=row["document_count"], + ) + for row in results + ] - collections = [ - CollectionResponse( - collection_id=row["collection_id"], - name=row["name"], - description=row["description"], - created_at=row["created_at"], - updated_at=row["updated_at"], + return {"results": collections, "total_entries": total_entries} + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An error occurred while fetching collections: {e}", ) - for row in results - ] - total_entries = results[0]["total_entries"] if results else 0 - - return {"results": collections, "total_entries": total_entries} async def assign_document_to_collection_relational( self, @@ -461,8 +403,8 @@ async def assign_document_to_collection_relational( # First, check if the document exists document_check_query = f""" - SELECT 1 FROM {self._get_table_name('document_info')} - WHERE document_id = $1 + SELECT 1 FROM {self._get_table_name('documents')} + WHERE id = $1 """ document_exists = await self.connection_manager.fetchrow_query( document_check_query, [document_id] @@ -475,10 +417,10 @@ async def assign_document_to_collection_relational( # If document exists, proceed with the assignment assign_query = f""" - UPDATE {self._get_table_name('document_info')} + UPDATE {self._get_table_name('documents')} SET collection_ids = array_append(collection_ids, $1) - WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids)) - RETURNING document_id + WHERE id = $2 AND NOT ($1 = ANY(collection_ids)) + RETURNING id """ result = await self.connection_manager.fetchrow_query( assign_query, [collection_id, document_id] @@ -502,40 +444,6 @@ async def assign_document_to_collection_relational( detail=f"An error '{e}' occurred while assigning the document to the collection", ) - async def document_collections( - self, document_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[CollectionResponse], int]]: - query = f""" - SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} g - JOIN {self._get_table_name('document_info')} d ON g.collection_id = ANY(d.collection_ids) - WHERE d.document_id = $1 - ORDER BY g.name - OFFSET $2 - """ - - conditions: list = [document_id, offset] - if limit != -1: - query += " LIMIT $3" - conditions.append(limit) - - results = await self.connection_manager.fetch_query(query, conditions) - - collections = [ - CollectionResponse( - collection_id=row["collection_id"], - name=row["name"], - description=row["description"], - created_at=row["created_at"], - updated_at=row["updated_at"], - ) - for row in results - ] - - total_entries = results[0]["total_entries"] if results else 0 - - return {"results": collections, "total_entries": total_entries} - async def remove_document_from_collection_relational( self, document_id: UUID, collection_id: UUID ) -> None: @@ -553,10 +461,10 @@ async def remove_document_from_collection_relational( raise R2RException(status_code=404, message="Collection not found") query = f""" - UPDATE {self._get_table_name('document_info')} + UPDATE {self._get_table_name('documents')} SET collection_ids = array_remove(collection_ids, $1) - WHERE document_id = $2 AND $1 = ANY(collection_ids) - RETURNING document_id + WHERE id = $2 AND $1 = ANY(collection_ids) + RETURNING id """ result = await self.connection_manager.fetchrow_query( query, [collection_id, document_id] diff --git a/py/core/providers/database/document.py b/py/core/providers/database/document.py index a883831f6..3504c43d7 100644 --- a/py/core/providers/database/document.py +++ b/py/core/providers/database/document.py @@ -2,7 +2,7 @@ import copy import json import logging -from typing import Any, Optional, Union +from typing import Any, Optional from uuid import UUID import asyncpg @@ -10,7 +10,7 @@ from core.base import ( DocumentHandler, - DocumentInfo, + DocumentResponse, DocumentType, IngestionStatus, KGEnrichmentStatus, @@ -25,11 +25,11 @@ class PostgresDocumentHandler(DocumentHandler): - TABLE_NAME = "document_info" + TABLE_NAME = "documents" COLUMN_VARS = [ "extraction_id", - "document_id", - "user_id", + "id", + "owner_id", "collection_ids", ] @@ -49,9 +49,9 @@ async def create_tables(self): try: query = f""" CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} ( - document_id UUID PRIMARY KEY, + id UUID PRIMARY KEY, collection_ids UUID[], - user_id UUID, + owner_id UUID, type TEXT, metadata JSONB, title TEXT, @@ -60,11 +60,11 @@ async def create_tables(self): version TEXT, size_in_bytes INT, ingestion_status TEXT DEFAULT 'pending', - kg_extraction_status TEXT DEFAULT 'pending', + extraction_status TEXT DEFAULT 'pending', created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW(), ingestion_attempt_number INT DEFAULT 0, - doc_search_vector tsvector GENERATED ALWAYS AS ( + raw_tsvector tsvector GENERATED ALWAYS AS ( setweight(to_tsvector('english', COALESCE(title, '')), 'A') || setweight(to_tsvector('english', COALESCE(summary, '')), 'B') || setweight(to_tsvector('english', COALESCE((metadata->>'description')::text, '')), 'C') @@ -76,21 +76,21 @@ async def create_tables(self): -- Full text search index CREATE INDEX IF NOT EXISTS idx_doc_search_{self.project_name} ON {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} - USING GIN (doc_search_vector); + USING GIN (raw_tsvector); """ await self.connection_manager.execute_query(query) except Exception as e: logger.warning(f"Error {e} when creating document table.") async def upsert_documents_overview( - self, documents_overview: Union[DocumentInfo, list[DocumentInfo]] + self, documents_overview: DocumentResponse | list[DocumentResponse] ) -> None: - if isinstance(documents_overview, DocumentInfo): + if isinstance(documents_overview, DocumentResponse): documents_overview = [documents_overview] # TODO: make this an arg max_retries = 20 - for document_info in documents_overview: + for document in documents_overview: retries = 0 while retries < max_retries: try: @@ -99,13 +99,13 @@ async def upsert_documents_overview( # Lock the row for update check_query = f""" SELECT ingestion_attempt_number, ingestion_status FROM {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} - WHERE document_id = $1 FOR UPDATE + WHERE id = $1 FOR UPDATE """ existing_doc = await conn.fetchrow( - check_query, document_info.id + check_query, document.id ) - db_entry = document_info.convert_to_db_entry() + db_entry = document.convert_to_db_entry() if existing_doc: db_version = existing_doc[ @@ -132,50 +132,51 @@ async def upsert_documents_overview( update_query = f""" UPDATE {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} - SET collection_ids = $1, user_id = $2, type = $3, metadata = $4, + SET collection_ids = $1, owner_id = $2, type = $3, metadata = $4, title = $5, version = $6, size_in_bytes = $7, ingestion_status = $8, - kg_extraction_status = $9, updated_at = $10, ingestion_attempt_number = $11, + extraction_status = $9, updated_at = $10, ingestion_attempt_number = $11, summary = $12, summary_embedding = $13 - WHERE document_id = $14 + WHERE id = $14 """ + await conn.execute( update_query, db_entry["collection_ids"], - db_entry["user_id"], + db_entry["owner_id"], db_entry["document_type"], db_entry["metadata"], db_entry["title"], db_entry["version"], db_entry["size_in_bytes"], db_entry["ingestion_status"], - db_entry["kg_extraction_status"], + db_entry["extraction_status"], db_entry["updated_at"], new_attempt_number, db_entry["summary"], db_entry["summary_embedding"], - document_info.id, + document.id, ) else: insert_query = f""" INSERT INTO {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} - (document_id, collection_ids, user_id, type, metadata, title, version, - size_in_bytes, ingestion_status, kg_extraction_status, created_at, + (id, collection_ids, owner_id, type, metadata, title, version, + size_in_bytes, ingestion_status, extraction_status, created_at, updated_at, ingestion_attempt_number, summary, summary_embedding) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15) """ await conn.execute( insert_query, - db_entry["document_id"], + db_entry["id"], db_entry["collection_ids"], - db_entry["user_id"], + db_entry["owner_id"], db_entry["document_type"], db_entry["metadata"], db_entry["title"], db_entry["version"], db_entry["size_in_bytes"], db_entry["ingestion_status"], - db_entry["kg_extraction_status"], + db_entry["extraction_status"], db_entry["created_at"], db_entry["updated_at"], db_entry["ingestion_attempt_number"], @@ -191,34 +192,28 @@ async def upsert_documents_overview( retries += 1 if retries == max_retries: logger.error( - f"Failed to update document {document_info.id} after {max_retries} attempts. Error: {str(e)}" + f"Failed to update document {document.id} after {max_retries} attempts. Error: {str(e)}" ) raise else: wait_time = 0.1 * (2**retries) # Exponential backoff await asyncio.sleep(wait_time) - except Exception as e: - if 'column "summary"' in str(e): - raise ValueError( - "Document schema is missing 'summary' and 'summary_embedding' columns. Call `r2r db upgrade` to carry out the necessary migration." - ) - raise async def delete_from_documents_overview( self, document_id: UUID, version: Optional[str] = None ) -> None: query = f""" DELETE FROM {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} - WHERE document_id = $1 + WHERE id = $1 """ params = [str(document_id)] if version: query += " AND version = $2" - params = [str(document_id), version] + params.append(version) - await self.connection_manager.execute_query(query, params) + await self.connection_manager.execute_query(query=query, params=params) async def _get_status_from_table( self, @@ -263,14 +258,13 @@ async def _get_ids_from_table( status_type (str): The type of status to retrieve. """ query = f""" - SELECT document_id FROM {self._get_table_name(table_name)} + SELECT id FROM {self._get_table_name(table_name)} WHERE {status_type} = ANY($1) and $2 = ANY(collection_ids) """ records = await self.connection_manager.fetch_query( query, [status, collection_id] ) - document_ids = [record["document_id"] for record in records] - return document_ids + return [record["id"] for record in records] async def _set_status_in_table( self, @@ -309,9 +303,11 @@ def _get_status_model(self, status_type: str): """ if status_type == "ingestion": return IngestionStatus - elif status_type == "kg_extraction_status": + elif status_type == "extraction_status": return KGExtractionStatus - elif status_type == "kg_enrichment_status": + elif status_type == "graph_cluster_status": + return KGEnrichmentStatus + elif status_type == "graph_sync_status": return KGEnrichmentStatus else: raise R2RException( @@ -319,7 +315,7 @@ def _get_status_model(self, status_type: str): ) async def get_workflow_status( - self, id: Union[UUID, list[UUID]], status_type: str + self, id: UUID | list[UUID], status_type: str ): """ Get the workflow status for a given document or list of documents. @@ -345,7 +341,7 @@ async def get_workflow_status( return result[0] if isinstance(id, UUID) else result async def set_workflow_status( - self, id: Union[UUID, list[UUID]], status_type: str, status: str + self, id: UUID | list[UUID], status_type: str, status: str ): """ Set the workflow status for a given document or list of documents. @@ -369,7 +365,7 @@ async def set_workflow_status( async def get_document_ids_by_status( self, status_type: str, - status: Union[str, list[str]], + status: str | list[str], collection_id: Optional[UUID] = None, ): """ @@ -385,30 +381,29 @@ async def get_document_ids_by_status( status = [status] out_model = self._get_status_model(status_type) - result = await self._get_ids_from_table( + return await self._get_ids_from_table( status, out_model.table_name(), status_type, collection_id ) - return result async def get_documents_overview( self, + offset: int, + limit: int, filter_user_ids: Optional[list[UUID]] = None, filter_document_ids: Optional[list[UUID]] = None, filter_collection_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = -1, ) -> dict[str, Any]: conditions = [] params: list[Any] = [] param_index = 1 if filter_document_ids: - conditions.append(f"document_id = ANY(${param_index})") + conditions.append(f"id = ANY(${param_index})") params.append(filter_document_ids) param_index += 1 if filter_user_ids: - conditions.append(f"user_id = ANY(${param_index})") + conditions.append(f"owner_id = ANY(${param_index})") params.append(filter_user_ids) param_index += 1 @@ -423,49 +418,13 @@ async def get_documents_overview( if conditions: base_query += " WHERE " + " AND ".join(conditions) - - # query = f""" - # SELECT document_id, collection_ids, user_id, type, metadata, title, version, - # size_in_bytes, ingestion_status, kg_extraction_status, created_at, updated_at, - # summary, summary_embedding, - # COUNT(*) OVER() AS total_entries - # {base_query} - # ORDER BY created_at DESC - # OFFSET ${param_index} - # """ - - # First check if the new columns exist - try: - check_query = f""" - SELECT EXISTS ( - SELECT 1 - FROM information_schema.columns - WHERE table_name = '{self._get_table_name(PostgresDocumentHandler.TABLE_NAME)}' - AND column_name = 'summary' - ); - """ - has_new_columns = await self.connection_manager.fetch_query( - check_query - ) - has_new_columns = has_new_columns[0]["exists"] - except Exception as e: - logger.warning(f"Error checking for new columns: {e}") - has_new_columns = False - # Construct the SELECT part of the query based on column existence - if has_new_columns: - select_fields = """ - SELECT document_id, collection_ids, user_id, type, metadata, title, version, - size_in_bytes, ingestion_status, kg_extraction_status, created_at, updated_at, - summary, summary_embedding, - COUNT(*) OVER() AS total_entries - """ - else: - select_fields = """ - SELECT document_id, collection_ids, user_id, type, metadata, title, version, - size_in_bytes, ingestion_status, kg_extraction_status, created_at, updated_at, - COUNT(*) OVER() AS total_entries - """ + select_fields = """ + SELECT id, collection_ids, owner_id, type, metadata, title, version, + size_in_bytes, ingestion_status, extraction_status, created_at, updated_at, + summary, summary_embedding, + COUNT(*) OVER() AS total_entries + """ query = f""" {select_fields} @@ -506,14 +465,14 @@ async def get_documents_overview( ] except Exception as e: logger.warning( - f"Failed to parse embedding for document {row['document_id']}: {e}" + f"Failed to parse embedding for document {row['id']}: {e}" ) documents.append( - DocumentInfo( - id=row["document_id"], + DocumentResponse( + id=row["id"], collection_ids=row["collection_ids"], - user_id=row["user_id"], + owner_id=row["owner_id"], document_type=DocumentType(row["type"]), metadata=json.loads(row["metadata"]), title=row["title"], @@ -522,8 +481,8 @@ async def get_documents_overview( ingestion_status=IngestionStatus( row["ingestion_status"] ), - kg_extraction_status=KGExtractionStatus( - row["kg_extraction_status"] + extraction_status=KGExtractionStatus( + row["extraction_status"] ), created_at=row["created_at"], updated_at=row["updated_at"], @@ -541,41 +500,34 @@ async def get_documents_overview( async def semantic_document_search( self, query_embedding: list[float], search_settings: SearchSettings - ) -> list[DocumentInfo]: + ) -> list[DocumentResponse]: """Search documents using semantic similarity with their summary embeddings.""" where_clauses = ["summary_embedding IS NOT NULL"] params: list[str | int | bytes] = [str(query_embedding)] # Handle filters - if search_settings.search_filters: + if search_settings.filters: filter_clause = self._build_filters( - search_settings.search_filters, params + search_settings.filters, params ) where_clauses.append(filter_clause) - # Handle collection filtering - if search_settings.selected_collection_ids: - where_clauses.append("collection_ids && $" + str(len(params) + 1)) - params.append( - [str(ele) for ele in search_settings.selected_collection_ids] # type: ignore - ) - where_clause = " AND ".join(where_clauses) query = f""" WITH document_scores AS ( SELECT - document_id, + id, collection_ids, - user_id, + owner_id, type, metadata, title, version, size_in_bytes, ingestion_status, - kg_extraction_status, + extraction_status, created_at, updated_at, summary, @@ -592,15 +544,15 @@ async def semantic_document_search( FROM document_scores """ - params.extend([search_settings.search_limit, search_settings.offset]) + params.extend([search_settings.limit, search_settings.offset]) results = await self.connection_manager.fetch_query(query, params) return [ - DocumentInfo( - id=row["document_id"], + DocumentResponse( + id=row["id"], collection_ids=row["collection_ids"], - user_id=row["user_id"], + owner_id=row["owner_id"], document_type=DocumentType(row["type"]), metadata={ **( @@ -615,9 +567,7 @@ async def semantic_document_search( version=row["version"], size_in_bytes=row["size_in_bytes"], ingestion_status=IngestionStatus(row["ingestion_status"]), - kg_extraction_status=KGExtractionStatus( - row["kg_extraction_status"] - ), + extraction_status=KGExtractionStatus(row["extraction_status"]), created_at=row["created_at"], updated_at=row["updated_at"], summary=row["summary"], @@ -632,46 +582,39 @@ async def semantic_document_search( async def full_text_document_search( self, query_text: str, search_settings: SearchSettings - ) -> list[DocumentInfo]: + ) -> list[DocumentResponse]: """Enhanced full-text search using generated tsvector.""" - where_clauses = [ - "doc_search_vector @@ websearch_to_tsquery('english', $1)" - ] + where_clauses = ["raw_tsvector @@ websearch_to_tsquery('english', $1)"] params: list[str | int | bytes] = [query_text] # Handle filters - if search_settings.search_filters: + if search_settings.filters: filter_clause = self._build_filters( - search_settings.search_filters, params + search_settings.filters, params ) where_clauses.append(filter_clause) - # Handle collection filtering - if search_settings.selected_collection_ids: - where_clauses.append("collection_ids && $" + str(len(params) + 1)) - params.append([str(ele) for ele in search_settings.selected_collection_ids]) # type: ignore - where_clause = " AND ".join(where_clauses) query = f""" WITH document_scores AS ( SELECT - document_id, + id, collection_ids, - user_id, + owner_id, type, metadata, title, version, size_in_bytes, ingestion_status, - kg_extraction_status, + extraction_status, created_at, updated_at, summary, summary_embedding, - ts_rank_cd(doc_search_vector, websearch_to_tsquery('english', $1), 32) as text_score + ts_rank_cd(raw_tsvector, websearch_to_tsquery('english', $1), 32) as text_score FROM {self._get_table_name(PostgresDocumentHandler.TABLE_NAME)} WHERE {where_clause} ORDER BY text_score DESC @@ -681,15 +624,15 @@ async def full_text_document_search( SELECT * FROM document_scores """ - params.extend([search_settings.search_limit, search_settings.offset]) + params.extend([search_settings.limit, search_settings.offset]) results = await self.connection_manager.fetch_query(query, params) return [ - DocumentInfo( - id=row["document_id"], + DocumentResponse( + id=row["id"], collection_ids=row["collection_ids"], - user_id=row["user_id"], + owner_id=row["owner_id"], document_type=DocumentType(row["type"]), metadata={ **( @@ -704,9 +647,7 @@ async def full_text_document_search( version=row["version"], size_in_bytes=row["size_in_bytes"], ingestion_status=IngestionStatus(row["ingestion_status"]), - kg_extraction_status=KGExtractionStatus( - row["kg_extraction_status"] - ), + extraction_status=KGExtractionStatus(row["extraction_status"]), created_at=row["created_at"], updated_at=row["updated_at"], summary=row["summary"], @@ -728,12 +669,12 @@ async def hybrid_document_search( query_text: str, query_embedding: list[float], search_settings: SearchSettings, - ) -> list[DocumentInfo]: + ) -> list[DocumentResponse]: """Search documents using both semantic and full-text search with RRF fusion.""" # Get more results than needed for better fusion extended_settings = copy.deepcopy(search_settings) - extended_settings.search_limit = search_settings.search_limit * 3 + extended_settings.limit = search_settings.limit * 3 # Get results from both search methods semantic_results = await self.semantic_document_search( @@ -770,15 +711,11 @@ async def hybrid_document_search( } # Calculate RRF scores using hybrid search settings - rrf_k = search_settings.hybrid_search_settings.rrf_k - semantic_weight = ( - search_settings.hybrid_search_settings.semantic_weight - ) - full_text_weight = ( - search_settings.hybrid_search_settings.full_text_weight - ) + rrf_k = search_settings.hybrid_settings.rrf_k + semantic_weight = search_settings.hybrid_settings.semantic_weight + full_text_weight = search_settings.hybrid_settings.full_text_weight - for doc_id, scores in doc_scores.items(): + for scores in doc_scores.values(): semantic_score = 1 / (rrf_k + scores["semantic_rank"]) full_text_score = 1 / (rrf_k + scores["full_text_rank"]) @@ -795,11 +732,11 @@ async def hybrid_document_search( doc_scores.values(), key=lambda x: x["final_score"], reverse=True )[ search_settings.offset : search_settings.offset - + search_settings.search_limit + + search_settings.limit ] return [ - DocumentInfo( + DocumentResponse( **{ **result["data"].__dict__, "metadata": { @@ -823,14 +760,17 @@ async def search_documents( query_text: str, query_embedding: Optional[list[float]] = None, search_settings: Optional[SearchSettings] = None, - ) -> list[DocumentInfo]: + ) -> list[DocumentResponse]: """ Main search method that delegates to the appropriate search method based on settings. """ if search_settings is None: search_settings = SearchSettings() - if search_settings.use_hybrid_search: + if ( + search_settings.use_semantic_search + and search_settings.use_fulltext_search + ) or search_settings.use_hybrid_search: if query_embedding is None: raise ValueError( "query_embedding is required for hybrid search" @@ -838,7 +778,7 @@ async def search_documents( return await self.hybrid_document_search( query_text, query_embedding, search_settings ) - elif search_settings.use_vector_search: + elif search_settings.use_semantic_search: if query_embedding is None: raise ValueError( "query_embedding is required for vector search" @@ -853,7 +793,7 @@ async def search_documents( # TODO - Remove copy pasta, consolidate def _build_filters( - self, filters: dict, parameters: list[Union[str, int, bytes]] + self, filters: dict, parameters: list[str | int | bytes] ) -> str: def parse_condition(key: str, value: Any) -> str: # type: ignore diff --git a/py/core/providers/database/file.py b/py/core/providers/database/file.py index df5496303..e6ec4cff6 100644 --- a/py/core/providers/database/file.py +++ b/py/core/providers/database/file.py @@ -16,23 +16,25 @@ class PostgresFileHandler(FileHandler): """PostgreSQL implementation of the FileHandler.""" + TABLE_NAME = "files" + connection_manager: PostgresConnectionManager async def create_tables(self) -> None: """Create the necessary tables for file storage.""" query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name('file_storage')} ( + CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresFileHandler.TABLE_NAME)} ( document_id UUID PRIMARY KEY, - file_name TEXT NOT NULL, - file_oid OID NOT NULL, - file_size BIGINT NOT NULL, - file_type TEXT, + name TEXT NOT NULL, + oid OID NOT NULL, + size BIGINT NOT NULL, + type TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); -- Create trigger for updating the updated_at timestamp - CREATE OR REPLACE FUNCTION {self.project_name}.update_file_storage_updated_at() + CREATE OR REPLACE FUNCTION {self.project_name}.update_files_updated_at() RETURNS TRIGGER AS $$ BEGIN NEW.updated_at = CURRENT_TIMESTAMP; @@ -40,13 +42,13 @@ async def create_tables(self) -> None: END; $$ LANGUAGE plpgsql; - DROP TRIGGER IF EXISTS update_file_storage_updated_at - ON {self._get_table_name('file_storage')}; + DROP TRIGGER IF EXISTS update_files_updated_at + ON {self._get_table_name(PostgresFileHandler.TABLE_NAME)}; - CREATE TRIGGER update_file_storage_updated_at - BEFORE UPDATE ON {self._get_table_name('file_storage')} + CREATE TRIGGER update_files_updated_at + BEFORE UPDATE ON {self._get_table_name(PostgresFileHandler.TABLE_NAME)} FOR EACH ROW - EXECUTE FUNCTION {self.project_name}.update_file_storage_updated_at(); + EXECUTE FUNCTION {self.project_name}.update_files_updated_at(); """ await self.connection_manager.execute_query(query) @@ -60,14 +62,14 @@ async def upsert_file( ) -> None: """Add or update a file entry in storage.""" query = f""" - INSERT INTO {self._get_table_name('file_storage')} - (document_id, file_name, file_oid, file_size, file_type) + INSERT INTO {self._get_table_name(PostgresFileHandler.TABLE_NAME)} + (document_id, name, oid, size, type) VALUES ($1, $2, $3, $4, $5) ON CONFLICT (document_id) DO UPDATE SET - file_name = EXCLUDED.file_name, - file_oid = EXCLUDED.file_oid, - file_size = EXCLUDED.file_size, - file_type = EXCLUDED.file_type, + name = EXCLUDED.name, + oid = EXCLUDED.oid, + size = EXCLUDED.size, + type = EXCLUDED.type, updated_at = NOW(); """ await self.connection_manager.execute_query( @@ -82,14 +84,16 @@ async def store_file( file_type: Optional[str] = None, ) -> None: """Store a new file in the database.""" - file_size = file_content.getbuffer().nbytes + size = file_content.getbuffer().nbytes - async with self.connection_manager.pool.get_connection() as conn: # type: ignore + async with ( # type: ignore + self.connection_manager.pool.get_connection() as conn + ): async with conn.transaction(): oid = await conn.fetchval("SELECT lo_create(0)") await self._write_lobject(conn, oid, file_content) await self.upsert_file( - document_id, file_name, oid, file_size, file_type + document_id, file_name, oid, size, file_type ) async def _write_lobject( @@ -101,10 +105,12 @@ async def _write_lobject( try: chunk_size = 8192 # 8 KB chunks while True: - chunk = file_content.read(chunk_size) - if not chunk: + if chunk := file_content.read(chunk_size): + await conn.execute( + "SELECT lowrite($1, $2)", lobject, chunk + ) + else: break - await conn.execute("SELECT lowrite($1, $2)", lobject, chunk) await conn.execute("SELECT lo_close($1)", lobject) @@ -120,8 +126,8 @@ async def retrieve_file( ) -> Optional[tuple[str, BinaryIO, int]]: """Retrieve a file from storage.""" query = f""" - SELECT file_name, file_oid, file_size - FROM {self._get_table_name('file_storage')} + SELECT name, oid, size + FROM {self._get_table_name(PostgresFileHandler.TABLE_NAME)} WHERE document_id = $1 """ @@ -134,15 +140,15 @@ async def retrieve_file( message=f"File for document {document_id} not found", ) - file_name, oid, file_size = ( - result["file_name"], - result["file_oid"], - result["file_size"], + file_name, oid, size = ( + result["name"], + result["oid"], + result["size"], ) async with self.connection_manager.pool.get_connection() as conn: # type: ignore file_content = await self._read_lobject(conn, oid) - return file_name, io.BytesIO(file_content), file_size + return file_name, io.BytesIO(file_content), size async def _read_lobject(self, conn, oid: int) -> bytes: """Read content from a large object.""" @@ -191,7 +197,7 @@ async def _read_lobject(self, conn, oid: int) -> bytes: async def delete_file(self, document_id: UUID) -> bool: """Delete a file from storage.""" query = f""" - SELECT file_oid FROM {self._get_table_name('file_storage')} + SELECT oid FROM {self._get_table_name(PostgresFileHandler.TABLE_NAME)} WHERE document_id = $1 """ @@ -207,7 +213,7 @@ async def delete_file(self, document_id: UUID) -> bool: await self._delete_lobject(conn, oid) delete_query = f""" - DELETE FROM {self._get_table_name('file_storage')} + DELETE FROM {self._get_table_name(PostgresFileHandler.TABLE_NAME)} WHERE document_id = $1 """ await conn.execute(delete_query, document_id) @@ -220,17 +226,17 @@ async def _delete_lobject(self, conn, oid: int) -> None: async def get_files_overview( self, + offset: int, + limit: int, filter_document_ids: Optional[list[UUID]] = None, filter_file_names: Optional[list[str]] = None, - offset: int = 0, - limit: int = 100, ) -> list[dict]: """Get an overview of stored files.""" conditions = [] params: list[Union[str, list[str], int]] = [] query = f""" - SELECT document_id, file_name, file_oid, file_size, file_type, created_at, updated_at - FROM {self._get_table_name('file_storage')} + SELECT document_id, name, oid, size, type, created_at, updated_at + FROM {self._get_table_name(PostgresFileHandler.TABLE_NAME)} """ if filter_document_ids: @@ -238,7 +244,7 @@ async def get_files_overview( params.append([str(doc_id) for doc_id in filter_document_ids]) if filter_file_names: - conditions.append(f"file_name = ANY(${len(params) + 1})") + conditions.append(f"name = ANY(${len(params) + 1})") params.append(filter_file_names) if conditions: @@ -258,10 +264,10 @@ async def get_files_overview( return [ { "document_id": row["document_id"], - "file_name": row["file_name"], - "file_oid": row["file_oid"], - "file_size": row["file_size"], - "file_type": row["file_type"], + "file_name": row["name"], + "file_oid": row["oid"], + "file_size": row["size"], + "file_type": row["type"], "created_at": row["created_at"], "updated_at": row["updated_at"], } diff --git a/py/core/providers/database/graph.py b/py/core/providers/database/graph.py new file mode 100644 index 000000000..323364aaf --- /dev/null +++ b/py/core/providers/database/graph.py @@ -0,0 +1,2703 @@ +import asyncio +import datetime +import json +import logging +import time +from enum import Enum +from typing import Any, AsyncGenerator, Optional, Tuple, Union +from uuid import UUID + +import asyncpg +from asyncpg.exceptions import UndefinedTableError, UniqueViolationError +from fastapi import HTTPException + +from core.base.abstractions import ( + Community, + Entity, + Graph, + KGCreationSettings, + KGEnrichmentSettings, + KGEnrichmentStatus, + KGEntityDeduplicationSettings, + KGExtractionStatus, + R2RException, + Relationship, + VectorQuantizationType, +) +from core.base.api.models import GraphResponse +from core.base.providers.database import ( + CommunityHandler, + EntityHandler, + GraphHandler, + RelationshipHandler, +) +from core.base.utils import ( + _decorate_vector_type, + _get_str_estimation_output, + llm_cost_per_million_tokens, +) + +from .base import PostgresConnectionManager +from .collection import PostgresCollectionHandler + + +class StoreType(str, Enum): + GRAPHS = "graphs" + DOCUMENTS = "documents" + + +logger = logging.getLogger() + + +class PostgresEntityHandler(EntityHandler): + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.project_name: str = kwargs.get("project_name") # type: ignore + self.connection_manager: PostgresConnectionManager = kwargs.get("connection_manager") # type: ignore + self.dimension: int = kwargs.get("dimension") # type: ignore + self.quantization_type: VectorQuantizationType = kwargs.get("quantization_type") # type: ignore + + def _get_table_name(self, table: str) -> str: + """Get the fully qualified table name.""" + return f'"{self.project_name}"."{table}"' + + def _get_entity_table_for_store(self, store_type: StoreType) -> str: + """Get the appropriate table name for the store type.""" + if isinstance(store_type, StoreType): + store_type = store_type.value + return f"{store_type}_entities" + + def _get_parent_constraint(self, store_type: StoreType) -> str: + """Get the appropriate foreign key constraint for the store type.""" + if store_type == StoreType.GRAPHS: + return f""" + CONSTRAINT fk_graph + FOREIGN KEY(parent_id) + REFERENCES {self._get_table_name("graphs")}(id) + ON DELETE CASCADE + """ + else: + return f""" + CONSTRAINT fk_document + FOREIGN KEY(parent_id) + REFERENCES {self._get_table_name("documents")}(id) + ON DELETE CASCADE + """ + + async def create_tables(self) -> None: + """Create separate tables for graph and document entities.""" + vector_column_str = _decorate_vector_type( + f"({self.dimension})", self.quantization_type + ) + + for store_type in StoreType: + table_name = self._get_entity_table_for_store(store_type) + parent_constraint = self._get_parent_constraint(store_type) + + QUERY = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name(table_name)} ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + name TEXT NOT NULL, + category TEXT, + description TEXT, + parent_id UUID NOT NULL, + description_embedding {vector_column_str}, + chunk_ids UUID[], + metadata JSONB, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + {parent_constraint} + ); + CREATE INDEX IF NOT EXISTS {table_name}_name_idx + ON {self._get_table_name(table_name)} (name); + CREATE INDEX IF NOT EXISTS {table_name}_parent_id_idx + ON {self._get_table_name(table_name)} (parent_id); + CREATE INDEX IF NOT EXISTS {table_name}_category_idx + ON {self._get_table_name(table_name)} (category); + """ + await self.connection_manager.execute_query(QUERY) + + async def create( + self, + parent_id: UUID, + store_type: StoreType, + name: str, + category: Optional[str] = None, + description: Optional[str] = None, + description_embedding: Optional[list[float] | str] = None, + chunk_ids: Optional[list[UUID]] = None, + metadata: Optional[dict[str, Any] | str] = None, + ) -> Entity: + """Create a new entity in the specified store.""" + table_name = self._get_entity_table_for_store(store_type) + + if isinstance(metadata, str): + try: + metadata = json.loads(metadata) + except json.JSONDecodeError: + pass + + if isinstance(description_embedding, list): + description_embedding = str(description_embedding) + + query = f""" + INSERT INTO {self._get_table_name(table_name)} + (name, category, description, parent_id, description_embedding, chunk_ids, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7) + RETURNING id, name, category, description, parent_id, chunk_ids, metadata + """ + + params = [ + name, + category, + description, + parent_id, + description_embedding, + chunk_ids, + json.dumps(metadata) if metadata else None, + ] + + result = await self.connection_manager.fetchrow_query( + query=query, + params=params, + ) + + return Entity( + id=result["id"], + name=result["name"], + category=result["category"], + description=result["description"], + parent_id=result["parent_id"], + chunk_ids=result["chunk_ids"], + metadata=result["metadata"], + ) + + async def get( + self, + parent_id: UUID, + store_type: StoreType, + offset: int, + limit: int, + entity_ids: Optional[list[UUID]] = None, + entity_names: Optional[list[str]] = None, + include_embeddings: bool = False, + ): + """Retrieve entities from the specified store.""" + table_name = self._get_entity_table_for_store(store_type) + + conditions = ["parent_id = $1"] + params: list[Any] = [parent_id] + param_index = 2 + + if entity_ids: + conditions.append(f"id = ANY(${param_index})") + params.append(entity_ids) + param_index += 1 + + if entity_names: + conditions.append(f"name = ANY(${param_index})") + params.append(entity_names) + param_index += 1 + + select_fields = """ + id, name, category, description, parent_id, + chunk_ids, metadata + """ + if include_embeddings: + select_fields += ", description_embedding" + + COUNT_QUERY = f""" + SELECT COUNT(*) + FROM {self._get_table_name(table_name)} + WHERE {' AND '.join(conditions)} + """ + + count_params = params[: param_index - 1] + count = ( + await self.connection_manager.fetch_query( + COUNT_QUERY, count_params + ) + )[0]["count"] + + QUERY = f""" + SELECT {select_fields} + FROM {self._get_table_name(table_name)} + WHERE {' AND '.join(conditions)} + ORDER BY created_at + OFFSET ${param_index} + """ + params.append(offset) + param_index += 1 + + if limit != -1: + QUERY += f" LIMIT ${param_index}" + params.append(limit) + + rows = await self.connection_manager.fetch_query(QUERY, params) + + entities = [] + for row in rows: + # Convert the Record to a dictionary + entity_dict = dict(row) + + # Process metadata if it exists and is a string + if isinstance(entity_dict["metadata"], str): + try: + entity_dict["metadata"] = json.loads( + entity_dict["metadata"] + ) + except json.JSONDecodeError: + pass + + entities.append(Entity(**entity_dict)) + + return entities, count + + async def update( + self, + entity_id: UUID, + store_type: StoreType, + name: Optional[str] = None, + description: Optional[str] = None, + description_embedding: Optional[list[float] | str] = None, + category: Optional[str] = None, + metadata: Optional[dict] = None, + ) -> Entity: + """Update an entity in the specified store.""" + table_name = self._get_entity_table_for_store(store_type) + update_fields = [] + params: list[Any] = [] + param_index = 1 + + if isinstance(metadata, str): + try: + metadata = json.loads(metadata) + except json.JSONDecodeError: + pass + + if name is not None: + update_fields.append(f"name = ${param_index}") + params.append(name) + param_index += 1 + + if description is not None: + update_fields.append(f"description = ${param_index}") + params.append(description) + param_index += 1 + + if description_embedding is not None: + update_fields.append(f"description_embedding = ${param_index}") + params.append(description_embedding) + param_index += 1 + + if category is not None: + update_fields.append(f"category = ${param_index}") + params.append(category) + param_index += 1 + + if metadata is not None: + update_fields.append(f"metadata = ${param_index}") + params.append(json.dumps(metadata)) + param_index += 1 + + if not update_fields: + raise R2RException(status_code=400, message="No fields to update") + + update_fields.append("updated_at = NOW()") + params.append(entity_id) + + query = f""" + UPDATE {self._get_table_name(table_name)} + SET {', '.join(update_fields)} + WHERE id = ${param_index}\ + RETURNING id, name, category, description, parent_id, chunk_ids, metadata + """ + try: + result = await self.connection_manager.fetchrow_query( + query=query, + params=params, + ) + + return Entity( + id=result["id"], + name=result["name"], + category=result["category"], + description=result["description"], + parent_id=result["parent_id"], + chunk_ids=result["chunk_ids"], + metadata=result["metadata"], + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An error occurred while updating the entity: {e}", + ) + + async def delete( + self, + parent_id: UUID, + entity_ids: Optional[list[UUID]] = None, + store_type: StoreType = StoreType.GRAPHS, + ) -> None: + """ + Delete entities from the specified store. + If entity_ids is not provided, deletes all entities for the given parent_id. + + Args: + parent_id (UUID): Parent ID (collection_id or document_id) + entity_ids (Optional[list[UUID]]): Specific entity IDs to delete. If None, deletes all entities for parent_id + store_type (StoreType): Type of store (graph or document) + + Returns: + list[UUID]: List of deleted entity IDs + + Raises: + R2RException: If specific entities were requested but not all found + """ + table_name = self._get_entity_table_for_store(store_type) + + if entity_ids is None: + # Delete all entities for the parent_id + QUERY = f""" + DELETE FROM {self._get_table_name(table_name)} + WHERE parent_id = $1 + RETURNING id + """ + results = await self.connection_manager.fetch_query( + QUERY, [parent_id] + ) + else: + # Delete specific entities + QUERY = f""" + DELETE FROM {self._get_table_name(table_name)} + WHERE id = ANY($1) AND parent_id = $2 + RETURNING id + """ + + results = await self.connection_manager.fetch_query( + QUERY, [entity_ids, parent_id] + ) + + # Check if all requested entities were deleted + deleted_ids = [row["id"] for row in results] + if entity_ids and len(deleted_ids) != len(entity_ids): + raise R2RException( + f"Some entities not found in {store_type} store or no permission to delete", + 404, + ) + + +class PostgresRelationshipHandler(RelationshipHandler): + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.project_name: str = kwargs.get("project_name") # type: ignore + self.connection_manager: PostgresConnectionManager = kwargs.get("connection_manager") # type: ignore + self.dimension: int = kwargs.get("dimension") # type: ignore + self.quantization_type: VectorQuantizationType = kwargs.get("quantization_type") # type: ignore + + def _get_table_name(self, table: str) -> str: + """Get the fully qualified table name.""" + return f'"{self.project_name}"."{table}"' + + def _get_relationship_table_for_store(self, store_type: StoreType) -> str: + """Get the appropriate table name for the store type.""" + if isinstance(store_type, StoreType): + store_type = store_type.value + return f"{store_type}_relationships" + + def _get_parent_constraint(self, store_type: StoreType) -> str: + """Get the appropriate foreign key constraint for the store type.""" + if store_type == StoreType.GRAPHS: + return f""" + CONSTRAINT fk_graph + FOREIGN KEY(parent_id) + REFERENCES {self._get_table_name("graphs")}(id) + ON DELETE CASCADE + """ + else: + return f""" + CONSTRAINT fk_document + FOREIGN KEY(parent_id) + REFERENCES {self._get_table_name("documents")}(id) + ON DELETE CASCADE + """ + + async def create_tables(self) -> None: + """Create separate tables for graph and document relationships.""" + for store_type in StoreType: + table_name = self._get_relationship_table_for_store(store_type) + parent_constraint = self._get_parent_constraint(store_type) + vector_column_str = _decorate_vector_type( + f"({self.dimension})", self.quantization_type + ) + QUERY = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name(table_name)} ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + subject TEXT NOT NULL, + predicate TEXT NOT NULL, + object TEXT NOT NULL, + description TEXT, + description_embedding {vector_column_str}, + subject_id UUID, + object_id UUID, + weight FLOAT DEFAULT 1.0, + chunk_ids UUID[], + parent_id UUID NOT NULL, + metadata JSONB, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW(), + {parent_constraint} + ); + + CREATE INDEX IF NOT EXISTS {table_name}_subject_idx + ON {self._get_table_name(table_name)} (subject); + CREATE INDEX IF NOT EXISTS {table_name}_object_idx + ON {self._get_table_name(table_name)} (object); + CREATE INDEX IF NOT EXISTS {table_name}_predicate_idx + ON {self._get_table_name(table_name)} (predicate); + CREATE INDEX IF NOT EXISTS {table_name}_parent_id_idx + ON {self._get_table_name(table_name)} (parent_id); + CREATE INDEX IF NOT EXISTS {table_name}_subject_id_idx + ON {self._get_table_name(table_name)} (subject_id); + CREATE INDEX IF NOT EXISTS {table_name}_object_id_idx + ON {self._get_table_name(table_name)} (object_id); + """ + await self.connection_manager.execute_query(QUERY) + + async def create( + self, + subject: str, + subject_id: UUID, + predicate: str, + object: str, + object_id: UUID, + parent_id: UUID, + store_type: StoreType, + description: str | None = None, + weight: float | None = 1.0, + chunk_ids: Optional[list[UUID]] = None, + description_embedding: Optional[list[float] | str] = None, + metadata: Optional[dict[str, Any] | str] = None, + ) -> Relationship: + """Create a new relationship in the specified store.""" + table_name = self._get_relationship_table_for_store(store_type) + + if isinstance(metadata, str): + try: + metadata = json.loads(metadata) + except json.JSONDecodeError: + pass + + if isinstance(description_embedding, list): + description_embedding = str(description_embedding) + + query = f""" + INSERT INTO {self._get_table_name(table_name)} + (subject, predicate, object, description, subject_id, object_id, + weight, chunk_ids, parent_id, description_embedding, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11) + RETURNING id, subject, predicate, object, description, subject_id, object_id, weight, chunk_ids, parent_id, metadata + """ + + params = [ + subject, + predicate, + object, + description, + subject_id, + object_id, + weight, + chunk_ids, + parent_id, + description_embedding, + json.dumps(metadata) if metadata else None, + ] + + result = await self.connection_manager.fetchrow_query( + query=query, + params=params, + ) + + return Relationship( + id=result["id"], + subject=result["subject"], + predicate=result["predicate"], + object=result["object"], + description=result["description"], + subject_id=result["subject_id"], + object_id=result["object_id"], + weight=result["weight"], + chunk_ids=result["chunk_ids"], + parent_id=result["parent_id"], + metadata=result["metadata"], + ) + + async def get( + self, + parent_id: UUID, + store_type: StoreType, + offset: int, + limit: int, + relationship_ids: Optional[list[UUID]] = None, + entity_names: Optional[list[str]] = None, + relationship_types: Optional[list[str]] = None, + include_metadata: bool = False, + ): + """ + Get relationships from the specified store. + + Args: + parent_id: UUID of the parent (collection_id or document_id) + store_type: Type of store (graph or document) + offset: Number of records to skip + limit: Maximum number of records to return (-1 for no limit) + relationship_ids: Optional list of specific relationship IDs to retrieve + entity_names: Optional list of entity names to filter by (matches subject or object) + relationship_types: Optional list of relationship types (predicates) to filter by + include_metadata: Whether to include metadata in the response + + Returns: + Tuple of (list of relationships, total count) + """ + table_name = self._get_relationship_table_for_store(store_type) + + conditions = ["parent_id = $1"] + params: list[Any] = [parent_id] + param_index = 2 + + if relationship_ids: + conditions.append(f"id = ANY(${param_index})") + params.append(relationship_ids) + param_index += 1 + + if entity_names: + conditions.append( + f"(subject = ANY(${param_index}) OR object = ANY(${param_index}))" + ) + params.append(entity_names) + param_index += 1 + + if relationship_types: + conditions.append(f"predicate = ANY(${param_index})") + params.append(relationship_types) + param_index += 1 + + select_fields = """ + id, subject, predicate, object, description, + subject_id, object_id, weight, chunk_ids, + parent_id + """ + if include_metadata: + select_fields += ", metadata" + + # Count query + COUNT_QUERY = f""" + SELECT COUNT(*) + FROM {self._get_table_name(table_name)} + WHERE {' AND '.join(conditions)} + """ + count_params = params[: param_index - 1] + count = ( + await self.connection_manager.fetch_query( + COUNT_QUERY, count_params + ) + )[0]["count"] + + # Main query + QUERY = f""" + SELECT {select_fields} + FROM {self._get_table_name(table_name)} + WHERE {' AND '.join(conditions)} + ORDER BY created_at + OFFSET ${param_index} + """ + params.append(offset) + param_index += 1 + + if limit != -1: + QUERY += f" LIMIT ${param_index}" + params.append(limit) + + rows = await self.connection_manager.fetch_query(QUERY, params) + + relationships = [] + for row in rows: + relationship_dict = dict(row) + if include_metadata and isinstance( + relationship_dict["metadata"], str + ): + try: + relationship_dict["metadata"] = json.loads( + relationship_dict["metadata"] + ) + except json.JSONDecodeError: + pass + elif not include_metadata: + relationship_dict.pop("metadata", None) + relationships.append(Relationship(**relationship_dict)) + + return relationships, count + + async def update( + self, + relationship_id: UUID, + store_type: StoreType, + subject: Optional[str], + subject_id: Optional[UUID], + predicate: Optional[str], + object: Optional[str], + object_id: Optional[UUID], + description: Optional[str], + description_embedding: Optional[list[float] | str], + weight: Optional[float], + metadata: Optional[dict[str, Any] | str], + ) -> Relationship: + """Update multiple relationships in the specified store.""" + table_name = self._get_relationship_table_for_store(store_type) + update_fields = [] + params: list = [] + param_index = 1 + + if isinstance(metadata, str): + try: + metadata = json.loads(metadata) + except json.JSONDecodeError: + pass + + if subject is not None: + update_fields.append(f"subject = ${param_index}") + params.append(subject) + param_index += 1 + + if subject_id is not None: + update_fields.append(f"subject_id = ${param_index}") + params.append(subject_id) + param_index += 1 + + if predicate is not None: + update_fields.append(f"predicate = ${param_index}") + params.append(predicate) + param_index += 1 + + if object is not None: + update_fields.append(f"object = ${param_index}") + params.append(object) + param_index += 1 + + if object_id is not None: + update_fields.append(f"object_id = ${param_index}") + params.append(object_id) + param_index += 1 + + if description is not None: + update_fields.append(f"description = ${param_index}") + params.append(description) + param_index += 1 + + if description_embedding is not None: + update_fields.append(f"description_embedding = ${param_index}") + params.append(description_embedding) + param_index += 1 + + if weight is not None: + update_fields.append(f"weight = ${param_index}") + params.append(weight) + param_index += 1 + + if not update_fields: + raise R2RException(status_code=400, message="No fields to update") + + update_fields.append("updated_at = NOW()") + params.append(relationship_id) + + query = f""" + UPDATE {self._get_table_name(table_name)} + SET {', '.join(update_fields)} + WHERE id = ${param_index} + RETURNING id, subject, predicate, object, description, subject_id, object_id, weight, chunk_ids, parent_id, metadata + """ + + try: + result = await self.connection_manager.fetchrow_query( + query=query, + params=params, + ) + + return Relationship( + id=result["id"], + subject=result["subject"], + predicate=result["predicate"], + object=result["object"], + description=result["description"], + subject_id=result["subject_id"], + object_id=result["object_id"], + weight=result["weight"], + chunk_ids=result["chunk_ids"], + parent_id=result["parent_id"], + metadata=result["metadata"], + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An error occurred while updating the relationship: {e}", + ) + + async def delete( + self, + parent_id: UUID, + relationship_ids: Optional[list[UUID]] = None, + store_type: StoreType = StoreType.GRAPHS, + ) -> None: + """ + Delete relationships from the specified store. + If relationship_ids is not provided, deletes all relationships for the given parent_id. + + Args: + parent_id: UUID of the parent (collection_id or document_id) + relationship_ids: Optional list of specific relationship IDs to delete + store_type: Type of store (graph or document) + + Returns: + List of deleted relationship IDs + + Raises: + R2RException: If specific relationships were requested but not all found + """ + table_name = self._get_relationship_table_for_store(store_type) + + if relationship_ids is None: + QUERY = f""" + DELETE FROM {self._get_table_name(table_name)} + WHERE parent_id = $1 + RETURNING id + """ + results = await self.connection_manager.fetch_query( + QUERY, [parent_id] + ) + else: + QUERY = f""" + DELETE FROM {self._get_table_name(table_name)} + WHERE id = ANY($1) AND parent_id = $2 + RETURNING id + """ + results = await self.connection_manager.fetch_query( + QUERY, [relationship_ids, parent_id] + ) + + deleted_ids = [row["id"] for row in results] + if relationship_ids and len(deleted_ids) != len(relationship_ids): + raise R2RException( + f"Some relationships not found in {store_type} store or no permission to delete", + 404, + ) + + +class PostgresCommunityHandler(CommunityHandler): + + def __init__(self, *args: Any, **kwargs: Any) -> None: + self.project_name: str = kwargs.get("project_name") # type: ignore + self.connection_manager: PostgresConnectionManager = kwargs.get("connection_manager") # type: ignore + self.dimension: int = kwargs.get("dimension") # type: ignore + self.quantization_type: VectorQuantizationType = kwargs.get("quantization_type") # type: ignore + + async def create_tables(self) -> None: + + vector_column_str = _decorate_vector_type( + f"({self.dimension})", self.quantization_type + ) + + query = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name("graphs_communities")} ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + collection_id UUID, + community_id UUID, + level INT, + name TEXT NOT NULL, + summary TEXT NOT NULL, + findings TEXT[], + rating FLOAT, + rating_explanation TEXT, + description_embedding {vector_column_str} NOT NULL, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + metadata JSONB, + UNIQUE (community_id, level, collection_id) + );""" + + await self.connection_manager.execute_query(query) + + async def create( + self, + parent_id: UUID, + store_type: StoreType, + name: str, + summary: str, + findings: Optional[list[str]], + rating: Optional[float], + rating_explanation: Optional[str], + description_embedding: Optional[list[float] | str] = None, + ) -> Community: + # Do we ever want to get communities from document store? + table_name = "graphs_communities" + + if isinstance(description_embedding, list): + description_embedding = str(description_embedding) + + query = f""" + INSERT INTO {self._get_table_name(table_name)} + (collection_id, name, summary, findings, rating, rating_explanation, description_embedding) + VALUES ($1, $2, $3, $4, $5, $6, $7) + RETURNING id, collection_id, name, summary, findings, rating, rating_explanation, created_at, updated_at + """ + + params = [ + parent_id, + name, + summary, + findings, + rating, + rating_explanation, + description_embedding, + ] + + try: + result = await self.connection_manager.fetchrow_query( + query=query, + params=params, + ) + + return Community( + id=result["id"], + collection_id=result["collection_id"], + name=result["name"], + summary=result["summary"], + findings=result["findings"], + rating=result["rating"], + rating_explanation=result["rating_explanation"], + created_at=result["created_at"], + updated_at=result["updated_at"], + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An error occurred while creating the community: {e}", + ) + + async def update( + self, + community_id: UUID, + store_type: StoreType, + name: Optional[str] = None, + summary: Optional[str] = None, + summary_embedding: Optional[list[float] | str] = None, + findings: Optional[list[str]] = None, + rating: Optional[float] = None, + rating_explanation: Optional[str] = None, + ) -> Community: + table_name = "graphs_communities" + update_fields = [] + params: list[Any] = [] + param_index = 1 + + if name is not None: + update_fields.append(f"name = ${param_index}") + params.append(name) + param_index += 1 + + if summary is not None: + update_fields.append(f"summary = ${param_index}") + params.append(summary) + param_index += 1 + + if summary_embedding is not None: + update_fields.append(f"description_embedding = ${param_index}") + params.append(summary_embedding) + param_index += 1 + + if findings is not None: + update_fields.append(f"findings = ${param_index}") + params.append(findings) + param_index += 1 + + if rating is not None: + update_fields.append(f"rating = ${param_index}") + params.append(rating) + param_index += 1 + + if rating_explanation is not None: + update_fields.append(f"rating_explanation = ${param_index}") + params.append(rating_explanation) + param_index += 1 + + if not update_fields: + raise R2RException(status_code=400, message="No fields to update") + + update_fields.append("updated_at = NOW()") + params.append(community_id) + + query = f""" + UPDATE {self._get_table_name(table_name)} + SET {", ".join(update_fields)} + WHERE id = ${param_index}\ + RETURNING id, community_id, name, summary, findings, rating, rating_explanation, created_at, updated_at + """ + try: + result = await self.connection_manager.fetchrow_query( + query, params + ) + + return Community( + id=result["id"], + community_id=result["community_id"], + name=result["name"], + summary=result["summary"], + findings=result["findings"], + rating=result["rating"], + rating_explanation=result["rating_explanation"], + created_at=result["created_at"], + updated_at=result["updated_at"], + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An error occurred while updating the community: {e}", + ) + + async def delete( + self, + parent_id: UUID, + community_id: UUID, + ) -> None: + table_name = "graphs_communities" + + query = f""" + DELETE FROM {self._get_table_name(table_name)} + WHERE id = $1 AND collection_id = $2 + """ + + params = [community_id, parent_id] + + try: + results = await self.connection_manager.execute_query( + query, params + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An error occurred while deleting the community: {e}", + ) + + params = [ + community_id, + parent_id, + ] + + try: + results = await self.connection_manager.execute_query( + query, params + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An error occurred while deleting the community: {e}", + ) + + async def get( + self, + parent_id: UUID, + store_type: StoreType, + offset: int, + limit: int, + community_ids: Optional[list[UUID]] = None, + community_names: Optional[list[str]] = None, + include_embeddings: bool = False, + ): + """Retrieve communities from the specified store.""" + # Do we ever want to get communities from document store? + table_name = "graphs_communities" + + conditions = ["collection_id = $1"] + params: list[Any] = [parent_id] + param_index = 2 + + if community_ids: + conditions.append(f"id = ANY(${param_index})") + params.append(community_ids) + param_index += 1 + + if community_names: + conditions.append(f"name = ANY(${param_index})") + params.append(community_names) + param_index += 1 + + select_fields = """ + id, community_id, name, summary, findings, rating, + rating_explanation, level, created_at, updated_at + """ + if include_embeddings: + select_fields += ", description_embedding" + + COUNT_QUERY = f""" + SELECT COUNT(*) + FROM {self._get_table_name(table_name)} + WHERE {' AND '.join(conditions)} + """ + + count = ( + await self.connection_manager.fetch_query( + COUNT_QUERY, params[: param_index - 1] + ) + )[0]["count"] + + QUERY = f""" + SELECT {select_fields} + FROM {self._get_table_name(table_name)} + WHERE {' AND '.join(conditions)} + ORDER BY created_at + OFFSET ${param_index} + """ + params.append(offset) + param_index += 1 + + if limit != -1: + QUERY += f" LIMIT ${param_index}" + params.append(limit) + + rows = await self.connection_manager.fetch_query(QUERY, params) + + communities = [] + for row in rows: + community_dict = dict(row) + + communities.append(Community(**community_dict)) + + return communities, count + + +class PostgresGraphHandler(GraphHandler): + """Handler for Knowledge Graph METHODS in PostgreSQL.""" + + TABLE_NAME = "graphs" + + def __init__( + self, + *args: Any, + **kwargs: Any, + ) -> None: + + self.project_name: str = kwargs.get("project_name") # type: ignore + self.connection_manager: PostgresConnectionManager = kwargs.get("connection_manager") # type: ignore + self.dimension: int = kwargs.get("dimension") # type: ignore + self.quantization_type: VectorQuantizationType = kwargs.get("quantization_type") # type: ignore + self.collections_handler: PostgresCollectionHandler = kwargs.get("collections_handler") # type: ignore + + self.entities = PostgresEntityHandler(*args, **kwargs) + self.relationships = PostgresRelationshipHandler(*args, **kwargs) + self.communities = PostgresCommunityHandler(*args, **kwargs) + + self.handlers = [ + self.entities, + self.relationships, + self.communities, + ] + + import networkx as nx + + self.nx = nx + + async def create_tables(self) -> None: + """Create the graph tables with mandatory collection_id support.""" + QUERY = f""" + CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresGraphHandler.TABLE_NAME)} ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + collection_id UUID NOT NULL, + name TEXT NOT NULL, + description TEXT, + status TEXT NOT NULL, + document_ids UUID[], + metadata JSONB, + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() + ); + + CREATE INDEX IF NOT EXISTS graph_collection_id_idx + ON {self._get_table_name("graphs")} (collection_id); + """ + + await self.connection_manager.execute_query(QUERY) + + for handler in self.handlers: + await handler.create_tables() + + async def create( + self, + collection_id: UUID, + name: Optional[str] = None, + description: Optional[str] = None, + status: str = "pending", + ) -> GraphResponse: + """Create a new graph associated with a collection.""" + + name = name or f"Graph {collection_id}" + description = description or "" + + query = f""" + INSERT INTO {self._get_table_name(PostgresGraphHandler.TABLE_NAME)} + (id, collection_id, name, description, status) + VALUES ($1, $2, $3, $4, $5) + RETURNING id, collection_id, name, description, status, created_at, updated_at, document_ids + """ + params = [ + collection_id, + collection_id, + name, + description, + status, + ] + + try: + result = await self.connection_manager.fetchrow_query( + query=query, + params=params, + ) + + return GraphResponse( + id=result["id"], + collection_id=result["collection_id"], + name=result["name"], + description=result["description"], + status=result["status"], + created_at=result["created_at"], + updated_at=result["updated_at"], + document_ids=result["document_ids"] or [], + ) + except UniqueViolationError: + raise R2RException( + message="Graph with this ID already exists", + status_code=409, + ) + + async def reset(self, parent_id: UUID) -> None: + """ + Completely reset a graph and all associated data. + """ + try: + entity_delete_query = f""" + DELETE FROM {self._get_table_name("graphs_entities")} + WHERE parent_id = $1 + """ + await self.connection_manager.execute_query( + entity_delete_query, [parent_id] + ) + + # Delete all graph relationships + relationship_delete_query = f""" + DELETE FROM {self._get_table_name("graphs_relationships")} + WHERE parent_id = $1 + """ + await self.connection_manager.execute_query( + relationship_delete_query, [parent_id] + ) + + # Delete all graph relationships + community_delete_query = f""" + DELETE FROM {self._get_table_name("graphs_communities")} + WHERE collection_id = $1 + """ + await self.connection_manager.execute_query( + community_delete_query, [parent_id] + ) + + # Delete all graph communities and community info + query = f""" + DELETE FROM {self._get_table_name("graphs_communities")} + WHERE collection_id = $1 + """ + + await self.connection_manager.execute_query(query, [parent_id]) + + except Exception as e: + logger.error(f"Error deleting graph {parent_id}: {str(e)}") + raise R2RException(f"Failed to delete graph: {str(e)}", 500) + + async def list_graphs( + self, + offset: int, + limit: int, + # filter_user_ids: Optional[list[UUID]] = None, + filter_graph_ids: Optional[list[UUID]] = None, + filter_collection_id: Optional[UUID] = None, + ) -> dict[str, list[GraphResponse] | int]: + conditions = [] + params: list[Any] = [] + param_index = 1 + + if filter_graph_ids: + conditions.append(f"id = ANY(${param_index})") + params.append(filter_graph_ids) + param_index += 1 + + # if filter_user_ids: + # conditions.append(f"user_id = ANY(${param_index})") + # params.append(filter_user_ids) + # param_index += 1 + + if filter_collection_id: + conditions.append(f"collection_id = ${param_index}") + params.append(filter_collection_id) + param_index += 1 + + where_clause = ( + f"WHERE {' AND '.join(conditions)}" if conditions else "" + ) + + query = f""" + WITH RankedGraphs AS ( + SELECT + id, collection_id, name, description, status, created_at, updated_at, document_ids, + COUNT(*) OVER() as total_entries, + ROW_NUMBER() OVER (PARTITION BY collection_id ORDER BY created_at DESC) as rn + FROM {self._get_table_name(PostgresGraphHandler.TABLE_NAME)} + {where_clause} + ) + SELECT * FROM RankedGraphs + WHERE rn = 1 + ORDER BY created_at DESC + OFFSET ${param_index} LIMIT ${param_index + 1} + """ + + params.extend([offset, limit]) + + try: + results = await self.connection_manager.fetch_query(query, params) + if not results: + return {"results": [], "total_entries": 0} + + total_entries = results[0]["total_entries"] if results else 0 + + graphs = [ + GraphResponse( + id=row["id"], + document_ids=row["document_ids"] or [], + name=row["name"], + collection_id=row["collection_id"], + description=row["description"], + status=row["status"], + created_at=row["created_at"], + updated_at=row["updated_at"], + ) + for row in results + ] + + return {"results": graphs, "total_entries": total_entries} + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An error occurred while fetching graphs: {e}", + ) + + async def get( + self, offset: int, limit: int, graph_id: Optional[UUID] = None + ): + + if graph_id is None: + + params = [offset, limit] + + QUERY = f""" + SELECT * FROM {self._get_table_name(PostgresGraphHandler.TABLE_NAME)} + OFFSET $1 LIMIT $2 + """ + + ret = await self.connection_manager.fetch_query(QUERY, params) + + COUNT_QUERY = f""" + SELECT COUNT(*) FROM {self._get_table_name(PostgresGraphHandler.TABLE_NAME)} + """ + count = (await self.connection_manager.fetch_query(COUNT_QUERY))[ + 0 + ]["count"] + + return { + "results": [Graph(**row) for row in ret], + "total_entries": count, + } + + else: + QUERY = f""" + SELECT * FROM {self._get_table_name(PostgresGraphHandler.TABLE_NAME)} WHERE id = $1 + """ + + params = [graph_id] # type: ignore + + return { + "results": [ + Graph( + **await self.connection_manager.fetchrow_query( + QUERY, params + ) + ) + ] + } + + async def add_documents(self, id: UUID, document_ids: list[UUID]) -> bool: + """ + Add documents to the graph by copying their entities and relationships. + """ + # Copy entities from document_entity to graphs_entities + ENTITY_COPY_QUERY = f""" + INSERT INTO {self._get_table_name("graphs_entities")} ( + name, category, description, parent_id, description_embedding, + chunk_ids, metadata + ) + SELECT + name, category, description, $1, description_embedding, + chunk_ids, metadata + FROM {self._get_table_name("documents_entities")} + WHERE parent_id = ANY($2) + """ + await self.connection_manager.execute_query( + ENTITY_COPY_QUERY, [id, document_ids] + ) + + # Copy relationships from documents_relationships to graphs_relationships + RELATIONSHIP_COPY_QUERY = f""" + INSERT INTO {self._get_table_name("graphs_relationships")} ( + subject, predicate, object, description, subject_id, object_id, + weight, chunk_ids, parent_id, metadata, description_embedding + ) + SELECT + subject, predicate, object, description, subject_id, object_id, + weight, chunk_ids, $1, metadata, description_embedding + FROM {self._get_table_name("documents_relationships")} + WHERE parent_id = ANY($2) + """ + await self.connection_manager.execute_query( + RELATIONSHIP_COPY_QUERY, [id, document_ids] + ) + + # Add document_ids to the graph + UPDATE_GRAPH_QUERY = f""" + UPDATE {self._get_table_name(PostgresGraphHandler.TABLE_NAME)} + SET document_ids = array_cat( + CASE + WHEN document_ids IS NULL THEN ARRAY[]::uuid[] + ELSE document_ids + END, + $2::uuid[] + ) + WHERE id = $1 + """ + await self.connection_manager.execute_query( + UPDATE_GRAPH_QUERY, [id, document_ids] + ) + + return True + + async def update( + self, + collection_id: UUID, + name: Optional[str] = None, + description: Optional[str] = None, + ) -> GraphResponse: + """Update an existing graph.""" + update_fields = [] + params: list = [] + param_index = 1 + + if name is not None: + update_fields.append(f"name = ${param_index}") + params.append(name) + param_index += 1 + + if description is not None: + update_fields.append(f"description = ${param_index}") + params.append(description) + param_index += 1 + + if not update_fields: + raise R2RException(status_code=400, message="No fields to update") + + update_fields.append("updated_at = NOW()") + params.append(collection_id) + + query = f""" + UPDATE {self._get_table_name(PostgresGraphHandler.TABLE_NAME)} + SET {', '.join(update_fields)} + WHERE id = ${param_index} + RETURNING id, name, description, status, created_at, updated_at, collection_id, document_ids + """ + + try: + result = await self.connection_manager.fetchrow_query( + query, params + ) + + if not result: + raise R2RException(status_code=404, message="Graph not found") + + return GraphResponse( + id=result["id"], + collection_id=result["collection_id"], + name=result["name"], + description=result["description"], + status=result["status"], + created_at=result["created_at"], + document_ids=result["document_ids"] or [], + updated_at=result["updated_at"], + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"An error occurred while updating the graph: {e}", + ) + + async def get_creation_estimate( + self, + graph_creation_settings: KGCreationSettings, + document_id: Optional[UUID] = None, + collection_id: Optional[UUID] = None, + ): + """Get the estimated cost and time for creating a KG.""" + + if bool(document_id) ^ bool(collection_id) is False: + raise ValueError( + "Exactly one of document_id or collection_id must be provided." + ) + + # todo: harmonize the document_id and id fields: postgres table contains document_id, but other places use id. + + document_ids = ( + [document_id] + if document_id + else [ + doc.id for doc in (await self.collections_handler.documents_in_collection(collection_id, offset=0, limit=-1))["results"] # type: ignore + ] + ) + + chunk_counts = await self.connection_manager.fetch_query( + f"SELECT document_id, COUNT(*) as chunk_count FROM {self._get_table_name('vectors')} " + f"WHERE document_id = ANY($1) GROUP BY document_id", + [document_ids], + ) + + total_chunks = ( + sum(doc["chunk_count"] for doc in chunk_counts) + // graph_creation_settings.chunk_merge_count + ) + estimated_entities = (total_chunks * 10, total_chunks * 20) + estimated_relationships = ( + int(estimated_entities[0] * 1.25), + int(estimated_entities[1] * 1.5), + ) + estimated_llm_calls = ( + total_chunks * 2 + estimated_entities[0], + total_chunks * 2 + estimated_entities[1], + ) + total_in_out_tokens = tuple( + 2000 * calls // 1000000 for calls in estimated_llm_calls + ) + cost_per_million = llm_cost_per_million_tokens( + graph_creation_settings.generation_config.model + ) + estimated_cost = tuple( + tokens * cost_per_million for tokens in total_in_out_tokens + ) + total_time_in_minutes = tuple( + tokens * 10 / 60 for tokens in total_in_out_tokens + ) + + return { + "message": 'Ran Graph Creation Estimate (not the actual run). Note that these are estimated ranges, actual values may vary. To run the KG creation process, run `extract-triples` with `--run` in the cli, or `run_type="run"` in the client.', + "document_count": len(document_ids), + "number_of_jobs_created": len(document_ids) + 1, + "total_chunks": total_chunks, + "estimated_entities": _get_str_estimation_output( + estimated_entities + ), + "estimated_relationships": _get_str_estimation_output( + estimated_relationships + ), + "estimated_llm_calls": _get_str_estimation_output( + estimated_llm_calls + ), + "estimated_total_in_out_tokens_in_millions": _get_str_estimation_output( + total_in_out_tokens + ), + "estimated_cost_in_usd": _get_str_estimation_output( + estimated_cost + ), + "estimated_total_time_in_minutes": "Depends on your API key tier. Accurate estimate coming soon. Rough estimate: " + + _get_str_estimation_output(total_time_in_minutes), + } + + async def get_enrichment_estimate( + self, + collection_id: UUID | None = None, + graph_id: UUID | None = None, + graph_enrichment_settings: KGEnrichmentSettings = KGEnrichmentSettings(), + ): + """Get the estimated cost and time for enriching a KG.""" + if collection_id is not None: + + document_ids = [ + doc.id + for doc in ( + await self.collections_handler.documents_in_collection(collection_id, offset=0, limit=-1) # type: ignore + )["results"] + ] + + # Get entity and relationship counts + entity_count = ( + await self.connection_manager.fetch_query( + f"SELECT COUNT(*) FROM {self._get_table_name('entity')} WHERE document_id = ANY($1);", + [document_ids], + ) + )[0]["count"] + + if not entity_count: + raise ValueError( + "No entities found in the graph. Please run `extract-triples` first." + ) + + relationship_count = ( + await self.connection_manager.fetch_query( + f"""SELECT COUNT(*) FROM {self._get_table_name("documents_relationships")} WHERE document_id = ANY($1);""", + [document_ids], + ) + )[0]["count"] + + else: + entity_count = ( + await self.connection_manager.fetch_query( + f"SELECT COUNT(*) FROM {self._get_table_name('entity')} WHERE $1 = ANY(graph_ids);", + [graph_id], + ) + )[0]["count"] + + if not entity_count: + raise ValueError( + "No entities found in the graph. Please run `extract-triples` first." + ) + + relationship_count = ( + await self.connection_manager.fetch_query( + f"SELECT COUNT(*) FROM {self._get_table_name('relationship')} WHERE $1 = ANY(graph_ids);", + [graph_id], + ) + )[0]["count"] + + # Calculate estimates + estimated_llm_calls = (entity_count // 10, entity_count // 5) + tokens_in_millions = tuple( + 2000 * calls / 1000000 for calls in estimated_llm_calls + ) + cost_per_million = llm_cost_per_million_tokens( + graph_enrichment_settings.generation_config.model # type: ignore + ) + estimated_cost = tuple( + tokens * cost_per_million for tokens in tokens_in_millions + ) + estimated_time = tuple( + tokens * 10 / 60 for tokens in tokens_in_millions + ) + + return { + "message": 'Ran Graph Enrichment Estimate (not the actual run). Note that these are estimated ranges, actual values may vary. To run the KG enrichment process, run `build-communities` with `--run` in the cli, or `run_type="run"` in the client.', + "total_entities": entity_count, + "total_relationships": relationship_count, + "estimated_llm_calls": _get_str_estimation_output( + estimated_llm_calls + ), + "estimated_total_in_out_tokens_in_millions": _get_str_estimation_output( + tokens_in_millions + ), + "estimated_cost_in_usd": _get_str_estimation_output( + estimated_cost + ), + "estimated_total_time_in_minutes": "Depends on your API key tier. Accurate estimate coming soon. Rough estimate: " + + _get_str_estimation_output(estimated_time), + } + + async def get_deduplication_estimate( + self, + collection_id: UUID, + kg_deduplication_settings: KGEntityDeduplicationSettings, + ): + """Get the estimated cost and time for deduplicating entities in a KG.""" + try: + query = f""" + SELECT name, count(name) + FROM {self._get_table_name("entity")} + WHERE document_id = ANY( + SELECT document_id FROM {self._get_table_name("documents")} + WHERE $1 = ANY(collection_ids) + ) + GROUP BY name + HAVING count(name) >= 5 + """ + entities = await self.connection_manager.fetch_query( + query, [collection_id] + ) + num_entities = len(entities) + + estimated_llm_calls = (num_entities, num_entities) + tokens_in_millions = ( + estimated_llm_calls[0] * 1000 / 1000000, + estimated_llm_calls[1] * 5000 / 1000000, + ) + cost_per_million = llm_cost_per_million_tokens( + kg_deduplication_settings.generation_config.model + ) + estimated_cost = ( + tokens_in_millions[0] * cost_per_million, + tokens_in_millions[1] * cost_per_million, + ) + estimated_time = ( + tokens_in_millions[0] * 10 / 60, + tokens_in_millions[1] * 10 / 60, + ) + + return { + "message": "Ran Deduplication Estimate (not the actual run). Note that these are estimated ranges.", + "num_entities": num_entities, + "estimated_llm_calls": _get_str_estimation_output( + estimated_llm_calls + ), + "estimated_total_in_out_tokens_in_millions": _get_str_estimation_output( + tokens_in_millions + ), + "estimated_cost_in_usd": _get_str_estimation_output( + estimated_cost + ), + "estimated_total_time_in_minutes": _get_str_estimation_output( + estimated_time + ), + } + except UndefinedTableError: + raise R2RException( + "Entity embedding table not found. Please run `extract-triples` first.", + 404, + ) + except Exception as e: + logger.error(f"Error in get_deduplication_estimate: {str(e)}") + raise HTTPException(500, "Error fetching deduplication estimate.") + + async def get_entities( + self, + parent_id: UUID, + offset: int, + limit: int, + entity_ids: Optional[list[UUID]] = None, + entity_names: Optional[list[str]] = None, + include_embeddings: bool = False, + ) -> tuple[list[Entity], int]: + """ + Get entities for a graph. + + Args: + offset: Number of records to skip + limit: Maximum number of records to return (-1 for no limit) + parent_id: UUID of the collection + entity_ids: Optional list of entity IDs to filter by + entity_names: Optional list of entity names to filter by + include_embeddings: Whether to include embeddings in the response + + Returns: + Tuple of (list of entities, total count) + """ + conditions = ["parent_id = $1"] + params: list[Any] = [parent_id] + param_index = 2 + + if entity_ids: + conditions.append(f"id = ANY(${param_index})") + params.append(entity_ids) + param_index += 1 + + if entity_names: + conditions.append(f"name = ANY(${param_index})") + params.append(entity_names) + param_index += 1 + + # Count query - uses the same conditions but without offset/limit + COUNT_QUERY = f""" + SELECT COUNT(*) + FROM {self._get_table_name("graphs_entities")} + WHERE {' AND '.join(conditions)} + """ + count = ( + await self.connection_manager.fetch_query(COUNT_QUERY, params) + )[0]["count"] + + # Define base columns to select + select_fields = """ + id, name, category, description, parent_id, + chunk_ids, metadata + """ + if include_embeddings: + select_fields += ", description_embedding" + + # Main query for fetching entities with pagination + QUERY = f""" + SELECT {select_fields} + FROM {self._get_table_name("graphs_entities")} + WHERE {' AND '.join(conditions)} + ORDER BY created_at + OFFSET ${param_index} + """ + params.append(offset) + param_index += 1 + + if limit != -1: + QUERY += f" LIMIT ${param_index}" + params.append(limit) + + rows = await self.connection_manager.fetch_query(QUERY, params) + + entities = [] + for row in rows: + entity_dict = dict(row) + if isinstance(entity_dict["metadata"], str): + try: + entity_dict["metadata"] = json.loads( + entity_dict["metadata"] + ) + except json.JSONDecodeError: + pass + + entities.append(Entity(**entity_dict)) + + return entities, count + + async def get_relationships( + self, + parent_id: UUID, + offset: int, + limit: int, + relationship_ids: Optional[list[UUID]] = None, + relationship_types: Optional[list[str]] = None, + include_embeddings: bool = False, + ) -> tuple[list[Relationship], int]: + """ + Get relationships for a graph. + + Args: + parent_id: UUID of the graph + offset: Number of records to skip + limit: Maximum number of records to return (-1 for no limit) + relationship_ids: Optional list of relationship IDs to filter by + relationship_types: Optional list of relationship types to filter by + include_metadata: Whether to include metadata in the response + + Returns: + Tuple of (list of relationships, total count) + """ + conditions = ["parent_id = $1"] + params: list[Any] = [parent_id] + param_index = 2 + + if relationship_ids: + conditions.append(f"id = ANY(${param_index})") + params.append(relationship_ids) + param_index += 1 + + if relationship_types: + conditions.append(f"predicate = ANY(${param_index})") + params.append(relationship_types) + param_index += 1 + + # Count query - uses the same conditions but without offset/limit + COUNT_QUERY = f""" + SELECT COUNT(*) + FROM {self._get_table_name("graphs_relationships")} + WHERE {' AND '.join(conditions)} + """ + count = ( + await self.connection_manager.fetch_query(COUNT_QUERY, params) + )[0]["count"] + + # Define base columns to select + select_fields = """ + id, subject, predicate, object, weight, chunk_ids, parent_id, metadata + """ + if include_embeddings: + select_fields += ", description_embedding" + + # Main query for fetching relationships with pagination + QUERY = f""" + SELECT {select_fields} + FROM {self._get_table_name("graphs_relationships")} + WHERE {' AND '.join(conditions)} + ORDER BY created_at + OFFSET ${param_index} + """ + params.append(offset) + param_index += 1 + + if limit != -1: + QUERY += f" LIMIT ${param_index}" + params.append(limit) + + rows = await self.connection_manager.fetch_query(QUERY, params) + + relationships = [] + for row in rows: + relationship_dict = dict(row) + if isinstance(relationship_dict["metadata"], str): + try: + relationship_dict["metadata"] = json.loads( + relationship_dict["metadata"] + ) + except json.JSONDecodeError: + pass + + relationships.append(Relationship(**relationship_dict)) + + return relationships, count + + async def add_entities( + self, + entities: list[Entity], + table_name: str, + conflict_columns: list[str] = [], + ) -> asyncpg.Record: + """ + Upsert entities into the entities_raw table. These are raw entities extracted from the document. + + Args: + entities: list[Entity]: list of entities to upsert + collection_name: str: name of the collection + + Returns: + result: asyncpg.Record: result of the upsert operation + """ + cleaned_entities = [] + for entity in entities: + entity_dict = entity.to_dict() + entity_dict["chunk_ids"] = ( + entity_dict["chunk_ids"] + if entity_dict.get("chunk_ids") + else [] + ) + entity_dict["description_embedding"] = ( + str(entity_dict["description_embedding"]) + if entity_dict.get("description_embedding") # type: ignore + else None + ) + cleaned_entities.append(entity_dict) + + return await _add_objects( + objects=cleaned_entities, + full_table_name=self._get_table_name(table_name), + connection_manager=self.connection_manager, + conflict_columns=conflict_columns, + ) + + async def delete_node_via_document_id( + self, document_id: UUID, collection_id: UUID + ) -> None: + # don't delete if status is PROCESSING. + QUERY = f""" + SELECT graph_cluster_status FROM {self._get_table_name("collections")} WHERE id = $1 + """ + status = ( + await self.connection_manager.fetch_query(QUERY, [collection_id]) + )[0]["graph_cluster_status"] + if status == KGExtractionStatus.PROCESSING.value: + return + + # Execute separate DELETE queries + delete_queries = [ + f"""DELETE FROM {self._get_table_name("documents_relationships")} WHERE parent_id = $1""", + f"""DELETE FROM {self._get_table_name("documents_entities")} WHERE parent_id = $1""", + ] + + for query in delete_queries: + await self.connection_manager.execute_query(query, [document_id]) + return None + + async def get_all_relationships( + self, + collection_id: UUID | None, + graph_id: UUID | None, + document_ids: Optional[list[UUID]] = None, + ) -> list[Relationship]: + + QUERY = f""" + SELECT id, subject, predicate, weight, object, parent_id FROM {self._get_table_name("graphs_relationships")} WHERE parent_id = ANY($1) + """ + relationships = await self.connection_manager.fetch_query( + QUERY, [collection_id] + ) + + return [Relationship(**relationship) for relationship in relationships] + + async def has_document(self, graph_id: UUID, document_id: UUID) -> bool: + """ + Check if a document exists in the graph's document_ids array. + + Args: + graph_id (UUID): ID of the graph to check + document_id (UUID): ID of the document to look for + + Returns: + bool: True if document exists in graph, False otherwise + + Raises: + R2RException: If graph not found + """ + QUERY = f""" + SELECT EXISTS ( + SELECT 1 + FROM {self._get_table_name("graphs")} + WHERE id = $1 + AND document_ids IS NOT NULL + AND $2 = ANY(document_ids) + ) as exists; + """ + + result = await self.connection_manager.fetchrow_query( + QUERY, [graph_id, document_id] + ) + + if result is None: + raise R2RException(f"Graph {graph_id} not found", 404) + + return result["exists"] + + async def get_communities( + self, + parent_id: UUID, + offset: int, + limit: int, + community_ids: Optional[list[UUID]] = None, + include_embeddings: bool = False, + ) -> tuple[list[Community], int]: + """ + Get communities for a graph. + + Args: + collection_id: UUID of the collection + offset: Number of records to skip + limit: Maximum number of records to return (-1 for no limit) + community_ids: Optional list of community IDs to filter by + include_embeddings: Whether to include embeddings in the response + + Returns: + Tuple of (list of communities, total count) + """ + conditions = ["collection_id = $1"] + params: list[Any] = [parent_id] + param_index = 2 + + if community_ids: + conditions.append(f"id = ANY(${param_index})") + params.append(community_ids) + param_index += 1 + + select_fields = """ + id, collection_id, name, summary, findings, rating, rating_explanation + """ + if include_embeddings: + select_fields += ", description_embedding" + + COUNT_QUERY = f""" + SELECT COUNT(*) + FROM {self._get_table_name("graphs_communities")} + WHERE {' AND '.join(conditions)} + """ + count = ( + await self.connection_manager.fetch_query(COUNT_QUERY, params) + )[0]["count"] + + QUERY = f""" + SELECT {select_fields} + FROM {self._get_table_name("graphs_communities")} + WHERE {' AND '.join(conditions)} + ORDER BY created_at + OFFSET ${param_index} + """ + params.append(offset) + param_index += 1 + + if limit != -1: + QUERY += f" LIMIT ${param_index}" + params.append(limit) + + rows = await self.connection_manager.fetch_query(QUERY, params) + + communities = [] + for row in rows: + community_dict = dict(row) + communities.append(Community(**community_dict)) + + return communities, count + + async def add_community(self, community: Community) -> None: + + # TODO: Fix in the short term. + # we need to do this because postgres insert needs to be a string + community.description_embedding = str(community.description_embedding) # type: ignore[assignment] + + non_null_attrs = { + k: v for k, v in community.__dict__.items() if v is not None + } + columns = ", ".join(non_null_attrs.keys()) + placeholders = ", ".join(f"${i+1}" for i in range(len(non_null_attrs))) + + conflict_columns = ", ".join( + [f"{k} = EXCLUDED.{k}" for k in non_null_attrs] + ) + + QUERY = f""" + INSERT INTO {self._get_table_name("graphs_communities")} ({columns}) + VALUES ({placeholders}) + ON CONFLICT (community_id, level, collection_id) DO UPDATE SET + {conflict_columns} + """ + + await self.connection_manager.execute_many( + QUERY, [tuple(non_null_attrs.values())] + ) + + async def delete_graph_for_collection( + self, collection_id: UUID, cascade: bool = False + ) -> None: + + # don't delete if status is PROCESSING. + QUERY = f""" + SELECT graph_cluster_status FROM {self._get_table_name("collections")} WHERE collection_id = $1 + """ + status = ( + await self.connection_manager.fetch_query(QUERY, [collection_id]) + )[0]["graph_cluster_status"] + if status == KGExtractionStatus.PROCESSING.value: + return + + # remove all relationships for these documents. + DELETE_QUERIES = [ + f"DELETE FROM {self._get_table_name('graphs_communities')} WHERE collection_id = $1;", + ] + + # FIXME: This was using the pagination defaults from before... We need to review if this is as intended. + document_ids_response = ( + await self.collections_handler.documents_in_collection( + offset=0, + limit=100, + collection_id=collection_id, + ) + ) + + # This type ignore is due to insufficient typing of the documents_in_collection method + document_ids = [doc.id for doc in document_ids_response["results"]] # type: ignore + + # TODO: make these queries more efficient. Pass the document_ids as params. + if cascade: + DELETE_QUERIES += [ + f"DELETE FROM {self._get_table_name('graphs_relationships')} WHERE document_id = ANY($1::uuid[]);", + f"DELETE FROM {self._get_table_name('graphs_entities')} WHERE document_id = ANY($1::uuid[]);", + f"DELETE FROM {self._get_table_name('graphs_entities')} WHERE collection_id = $1;", + ] + + # setting the kg_creation_status to PENDING for this collection. + QUERY = f""" + UPDATE {self._get_table_name("documents")} SET extraction_status = $1 WHERE $2::uuid = ANY(collection_ids) + """ + await self.connection_manager.execute_query( + QUERY, [KGExtractionStatus.PENDING, collection_id] + ) + + for query in DELETE_QUERIES: + if "community" in query or "graphs_entities" in query: + await self.connection_manager.execute_query( + query, [collection_id] + ) + else: + await self.connection_manager.execute_query( + query, [document_ids] + ) + + # set status to PENDING for this collection. + QUERY = f""" + UPDATE {self._get_table_name("collections")} SET graph_cluster_status = $1 WHERE collection_id = $2 + """ + await self.connection_manager.execute_query( + QUERY, [KGExtractionStatus.PENDING, collection_id] + ) + + async def perform_graph_clustering( + self, + collection_id: UUID, + leiden_params: dict[str, Any], + ) -> Tuple[int, Any]: + """ + Leiden clustering algorithm to cluster the knowledge graph relationships into communities. + + Available parameters and defaults: + max_cluster_size: int = 1000, + starting_communities: Optional[dict[str, int]] = None, + extra_forced_iterations: int = 0, + resolution: int | float = 1.0, + randomness: int | float = 0.001, + use_modularity: bool = True, + random_seed: Optional[int] = None, + weight_attribute: str = "weight", + is_weighted: Optional[bool] = None, + weight_default: int| float = 1.0, + check_directed: bool = True, + """ + + offset = 0 + page_size = 1000 # Increased batch size for efficiency + all_relationships = [] + while True: + relationships, count = await self.relationships.get( + parent_id=collection_id, + store_type=StoreType.GRAPHS, + offset=offset, + limit=page_size, + ) + + if not relationships: + break + + all_relationships.extend(relationships) + offset += len(relationships) + + if offset >= count: + break + + relationship_ids_cache = await self._get_relationship_ids_cache( + relationships + ) + + logger.info( + f"Clustering over {len(all_relationships)} relationships for {collection_id} with settings: {leiden_params}" + ) + return await self._cluster_and_add_community_info( + relationships=relationships, + relationship_ids_cache=relationship_ids_cache, + leiden_params=leiden_params, + collection_id=collection_id, + ) + + async def get_entity_map( + self, offset: int, limit: int, document_id: UUID + ) -> dict[str, dict[str, list[dict[str, Any]]]]: + + QUERY1 = f""" + WITH entities_list AS ( + SELECT DISTINCT name + FROM {self._get_table_name("documents_entities")} + WHERE parent_id = $1 + ORDER BY name ASC + LIMIT {limit} OFFSET {offset} + ) + SELECT e.name, e.description, e.category, + (SELECT array_agg(DISTINCT x) FROM unnest(e.chunk_ids) x) AS chunk_ids, + e.parent_id + FROM {self._get_table_name("documents_entities")} e + JOIN entities_list el ON e.name = el.name + GROUP BY e.name, e.description, e.category, e.chunk_ids, e.parent_id + ORDER BY e.name;""" + + entities_list = await self.connection_manager.fetch_query( + QUERY1, [document_id] + ) + entities_list = [Entity(**entity) for entity in entities_list] + + QUERY2 = f""" + WITH entities_list AS ( + + SELECT DISTINCT name + FROM {self._get_table_name("documents_entities")} + WHERE parent_id = $1 + ORDER BY name ASC + LIMIT {limit} OFFSET {offset} + ) + + SELECT DISTINCT t.subject, t.predicate, t.object, t.weight, t.description, + (SELECT array_agg(DISTINCT x) FROM unnest(t.chunk_ids) x) AS chunk_ids, t.parent_id + FROM {self._get_table_name("documents_relationships")} t + JOIN entities_list el ON t.subject = el.name + ORDER BY t.subject, t.predicate, t.object; + """ + + relationships_list = await self.connection_manager.fetch_query( + QUERY2, [document_id] + ) + relationships_list = [ + Relationship(**relationship) for relationship in relationships_list + ] + + entity_map: dict[str, dict[str, list[Any]]] = {} + for entity in entities_list: + if entity.name not in entity_map: + entity_map[entity.name] = {"entities": [], "relationships": []} + entity_map[entity.name]["entities"].append(entity) + + for relationship in relationships_list: + if relationship.subject in entity_map: + entity_map[relationship.subject]["relationships"].append( + relationship + ) + if relationship.object in entity_map: + entity_map[relationship.object]["relationships"].append( + relationship + ) + + return entity_map + + def _build_filters( + self, filters: dict, parameters: list[Union[str, int, bytes]] + ) -> str: + def parse_condition(key: str, value: Any) -> str: # type: ignore + # nonlocal parameters + if key == "collection_ids": + if isinstance(value, dict): + op, clause = next(iter(value.items())) + if op == "$overlap": + # Match if collection_id equals any of the provided IDs + parameters.append(clause) # Add the whole array of IDs + + return f"parent_id = ANY(${len(parameters)})" # TODO - this is hard coded to assume graph id - collection id + raise Exception( + "Unknown filter for `collection_ids`, only `$overlap` is supported" + ) + elif key == "document_id": + logger.warning( + "Filtering by `document_id` is not supported with graph search, ignoring." + ) + elif key == "chunk_id": + logger.warning( + "Filtering by `chunk_id` is not supported with graph search, ignoring." + ) + elif key == "user_id": + logger.warning( + "Filtering by `user_id` is not supported with graph search, ignoring. Use `collection_ids` instead." + ) + + else: + # Handle JSON-based filters + json_col = "metadata" + if key.startswith("metadata."): + key = key.split("metadata.")[1] + if isinstance(value, dict): + op, clause = next(iter(value.items())) + if op not in ( + "$eq", + "$ne", + "$lt", + "$lte", + "$gt", + "$gte", + "$in", + "$contains", + ): + raise Exception("unknown operator") + + if op == "$eq": + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' = ${len(parameters)}::jsonb" + ) + elif op == "$ne": + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' != ${len(parameters)}::jsonb" + ) + elif op == "$lt": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float < (${len(parameters)}::jsonb)::float" + elif op == "$lte": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float <= (${len(parameters)}::jsonb)::float" + elif op == "$gt": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float > (${len(parameters)}::jsonb)::float" + elif op == "$gte": + parameters.append(json.dumps(clause)) + return f"({json_col}->'{key}')::float >= (${len(parameters)}::jsonb)::float" + elif op == "$in": + if not isinstance(clause, list): + raise Exception( + "argument to $in filter must be a list" + ) + parameters.append(json.dumps(clause)) + return f"{json_col}->'{key}' = ANY(SELECT jsonb_array_elements(${len(parameters)}::jsonb))" + elif op == "$contains": + if not isinstance(clause, (int, str, float, list)): + raise Exception( + "argument to $contains filter must be a scalar or array" + ) + parameters.append(json.dumps(clause)) + return ( + f"{json_col}->'{key}' @> ${len(parameters)}::jsonb" + ) + + def parse_filter(filter_dict: dict) -> str: + filter_conditions = [] + for key, value in filter_dict.items(): + if key == "$and": + and_conditions = [ + parse_filter(f) for f in value if f + ] # Skip empty dictionaries + if and_conditions: + filter_conditions.append( + f"({' AND '.join(and_conditions)})" + ) + elif key == "$or": + or_conditions = [ + parse_filter(f) for f in value if f + ] # Skip empty dictionaries + if or_conditions: + filter_conditions.append( + f"({' OR '.join(or_conditions)})" + ) + else: + filter_conditions.append(parse_condition(key, value)) + + # Check if there is only a single condition + if len(filter_conditions) == 1: + return filter_conditions[0] + else: + return " AND ".join(filter_conditions) + + where_clause = parse_filter(filters) + + return where_clause + + async def graph_search( + self, query: str, **kwargs: Any + ) -> AsyncGenerator[Any, None]: + """ + Perform semantic search with similarity scores while maintaining exact same structure. + """ + query_embedding = kwargs.get("query_embedding", None) + search_type = kwargs.get("search_type", "entities") + embedding_type = kwargs.get("embedding_type", "description_embedding") + property_names = kwargs.get("property_names", ["name", "description"]) + if "metadata" not in property_names: + property_names.append("metadata") + # if search_type == "community" and "collection_id" not in property_names: + # property_names.append("collection_id") + + filters = kwargs.get("filters", {}) + limit = kwargs.get("limit", 10) + use_fulltext_search = kwargs.get("use_fulltext_search", True) + use_hybrid_search = kwargs.get("use_hybrid_search", True) + if use_hybrid_search or use_fulltext_search: + logger.warning( + "Hybrid and fulltext search not supported for graph search, ignoring." + ) + + table_name = f"graphs_{search_type}" + property_names_str = ", ".join(property_names) + where_clause = "" + params: list[Union[str, int, bytes]] = [str(query_embedding), limit] + if filters: + where_clause = self._build_filters(filters, params) + where_clause = f"WHERE {where_clause}" + + # Modified query to include similarity score while keeping same structure + QUERY = f""" + SELECT + {property_names_str}, + ({embedding_type} <=> $1) as similarity_score + FROM {self._get_table_name(table_name)} {where_clause} + ORDER BY {embedding_type} <=> $1 + LIMIT $2; + """ + results = await self.connection_manager.fetch_query( + QUERY, tuple(params) + ) + + for result in results: + # import pdb; pdb.set_trace() + output = { + property_name: result[property_name] + for property_name in property_names + } + output["similarity_score"] = 1 - float(result["similarity_score"]) + yield output + + async def _create_graph_and_cluster( + self, relationships: list[Relationship], leiden_params: dict[str, Any] + ) -> Any: + + G = self.nx.Graph() + for relationship in relationships: + G.add_edge( + relationship.subject, + relationship.object, + weight=relationship.weight, + id=relationship.id, + ) + + logger.info(f"Graph has {len(G.nodes)} nodes and {len(G.edges)} edges") + + return await self._compute_leiden_communities(G, leiden_params) + + async def _cluster_and_add_community_info( + self, + relationships: list[Relationship], + relationship_ids_cache: dict[str, list[int]], + leiden_params: dict[str, Any], + collection_id: Optional[UUID] = None, + ) -> Tuple[int, Any]: + + # clear if there is any old information + conditions = [] + if collection_id is not None: + conditions.append("collection_id = $1") + + await asyncio.sleep(0.1) + + start_time = time.time() + + logger.info(f"Creating graph and clustering for {collection_id}") + + hierarchical_communities = await self._create_graph_and_cluster( + relationships=relationships, + leiden_params=leiden_params, + ) + + logger.info( + f"Computing Leiden communities completed, time {time.time() - start_time:.2f} seconds." + ) + + def relationship_ids(node: str) -> list[int]: + return relationship_ids_cache.get(node, []) + + logger.info( + f"Cached {len(relationship_ids_cache)} relationship ids, time {time.time() - start_time:.2f} seconds." + ) + + num_communities = ( + max(item.cluster for item in hierarchical_communities) + 1 + ) + + logger.info( + f"Generated {num_communities} communities, time {time.time() - start_time:.2f} seconds." + ) + + return num_communities, hierarchical_communities + + async def _get_relationship_ids_cache( + self, relationships: list[Relationship] + ) -> dict[str, list[int]]: + + # caching the relationship ids + relationship_ids_cache = dict[str, list[int | UUID]]() + for relationship in relationships: + if ( + relationship.subject not in relationship_ids_cache + and relationship.subject is not None + ): + relationship_ids_cache[relationship.subject] = [] + if ( + relationship.object not in relationship_ids_cache + and relationship.object is not None + ): + relationship_ids_cache[relationship.object] = [] + if ( + relationship.subject is not None + and relationship.id is not None + ): + relationship_ids_cache[relationship.subject].append( + relationship.id + ) + if relationship.object is not None and relationship.id is not None: + relationship_ids_cache[relationship.object].append( + relationship.id + ) + + return relationship_ids_cache # type: ignore + + async def _compute_leiden_communities( + self, + graph: Any, + leiden_params: dict[str, Any], + ) -> Any: + """Compute Leiden communities.""" + try: + from graspologic.partition import hierarchical_leiden + + if "random_seed" not in leiden_params: + leiden_params["random_seed"] = ( + 7272 # add seed to control randomness + ) + + start_time = time.time() + logger.info( + f"Running Leiden clustering with params: {leiden_params}" + ) + + community_mapping = hierarchical_leiden(graph, **leiden_params) + + logger.info( + f"Leiden clustering completed in {time.time() - start_time:.2f} seconds." + ) + return community_mapping + + except ImportError as e: + raise ImportError("Please install the graspologic package.") from e + + async def get_existing_document_entity_chunk_ids( + self, document_id: UUID + ) -> list[str]: + QUERY = f""" + SELECT DISTINCT unnest(chunk_ids) AS chunk_id FROM {self._get_table_name("documents_entities")} WHERE parent_id = $1 + """ + return [ + item["chunk_id"] + for item in await self.connection_manager.fetch_query( + QUERY, [document_id] + ) + ] + + async def get_entity_count( + self, + collection_id: Optional[UUID] = None, + document_id: Optional[UUID] = None, + distinct: bool = False, + entity_table_name: str = "entity", + ) -> int: + + if collection_id is None and document_id is None: + raise ValueError( + "Either collection_id or document_id must be provided." + ) + + conditions = ["parent_id = $1"] + params = [str(document_id)] + + count_value = "DISTINCT name" if distinct else "*" + + QUERY = f""" + SELECT COUNT({count_value}) FROM {self._get_table_name(entity_table_name)} + WHERE {" AND ".join(conditions)} + """ + + return (await self.connection_manager.fetch_query(QUERY, params))[0][ + "count" + ] + + async def update_entity_descriptions(self, entities: list[Entity]): + + query = f""" + UPDATE {self._get_table_name("graphs_entities")} + SET description = $3, description_embedding = $4 + WHERE name = $1 AND graph_id = $2 + """ + + inputs = [ + ( + entity.name, + entity.parent_id, + entity.description, + entity.description_embedding, + ) + for entity in entities + ] + + await self.connection_manager.execute_many(query, inputs) # type: ignore + + +def _json_serialize(obj): + if isinstance(obj, UUID): + return str(obj) + elif isinstance(obj, (datetime.datetime, datetime.date)): + return obj.isoformat() + raise TypeError(f"Object of type {type(obj)} is not JSON serializable") + + +async def _add_objects( + objects: list[dict], + full_table_name: str, + connection_manager: PostgresConnectionManager, + conflict_columns: list[str] = [], + exclude_metadata: list[str] = [], +) -> list[UUID]: + """ + Bulk insert objects into the specified table using jsonb_to_recordset. + """ + + # Exclude specified metadata and prepare data + cleaned_objects = [] + for obj in objects: + cleaned_obj = { + k: v + for k, v in obj.items() + if k not in exclude_metadata and v is not None + } + cleaned_objects.append(cleaned_obj) + + # Serialize the list of objects to JSON + json_data = json.dumps(cleaned_objects, default=_json_serialize) + + # Prepare the column definitions for jsonb_to_recordset + + columns = cleaned_objects[0].keys() + column_defs = [] + for col in columns: + # Map Python types to PostgreSQL types + sample_value = cleaned_objects[0][col] + if "embedding" in col: + pg_type = "vector" + elif "chunk_ids" in col or "document_ids" in col or "graph_ids" in col: + pg_type = "uuid[]" + elif col == "id" or "_id" in col: + pg_type = "uuid" + elif isinstance(sample_value, str): + pg_type = "text" + elif isinstance(sample_value, UUID): + pg_type = "uuid" + elif isinstance(sample_value, (int, float)): + pg_type = "numeric" + elif isinstance(sample_value, list) and all( + isinstance(x, UUID) for x in sample_value + ): + pg_type = "uuid[]" + elif isinstance(sample_value, list): + pg_type = "jsonb" + elif isinstance(sample_value, dict): + pg_type = "jsonb" + elif isinstance(sample_value, bool): + pg_type = "boolean" + elif isinstance(sample_value, (datetime.datetime, datetime.date)): + pg_type = "timestamp" + else: + raise TypeError( + f"Unsupported data type for column '{col}': {type(sample_value)}" + ) + + column_defs.append(f"{col} {pg_type}") + + columns_str = ", ".join(columns) + column_defs_str = ", ".join(column_defs) + + if conflict_columns: + conflict_columns_str = ", ".join(conflict_columns) + update_columns_str = ", ".join( + f"{col}=EXCLUDED.{col}" + for col in columns + if col not in conflict_columns + ) + on_conflict_clause = f"ON CONFLICT ({conflict_columns_str}) DO UPDATE SET {update_columns_str}" + else: + on_conflict_clause = "" + + QUERY = f""" + INSERT INTO {full_table_name} ({columns_str}) + SELECT {columns_str} + FROM jsonb_to_recordset($1::jsonb) + AS x({column_defs_str}) + {on_conflict_clause} + RETURNING id; + """ + + # Execute the query + result = await connection_manager.fetch_query(QUERY, [json_data]) + + # Extract and return the IDs + return [record["id"] for record in result] diff --git a/py/core/providers/database/kg.py b/py/core/providers/database/kg.py deleted file mode 100644 index b233041dd..000000000 --- a/py/core/providers/database/kg.py +++ /dev/null @@ -1,1643 +0,0 @@ -import json -import logging -import time -from typing import Any, AsyncGenerator, Optional, Tuple -from uuid import UUID - -import asyncpg -from asyncpg.exceptions import PostgresError, UndefinedTableError -from fastapi import HTTPException - -from core.base import ( - CommunityReport, - Entity, - KGExtraction, - KGExtractionStatus, - KGHandler, - R2RException, - Triple, -) -from core.base.abstractions import ( - CommunityInfo, - EntityLevel, - KGCreationSettings, - KGEnrichmentSettings, - KGEnrichmentStatus, - KGEntityDeduplicationSettings, - VectorQuantizationType, -) -from core.base.api.models import ( - KGCreationEstimationResponse, - KGDeduplicationEstimationResponse, - KGEnrichmentEstimationResponse, -) -from core.base.utils import _decorate_vector_type, llm_cost_per_million_tokens - -from .base import PostgresConnectionManager -from .collection import PostgresCollectionHandler - -logger = logging.getLogger() - - -class PostgresKGHandler(KGHandler): - """Handler for Knowledge Graph operations in PostgreSQL.""" - - def __init__( - self, - project_name: str, - connection_manager: PostgresConnectionManager, - collection_handler: PostgresCollectionHandler, - dimension: int, - quantization_type: VectorQuantizationType, - *args: Any, - **kwargs: Any, - ) -> None: - """Initialize the handler with the same signature as the original provider.""" - super().__init__(project_name, connection_manager) - self.collection_handler = collection_handler - self.dimension = dimension - self.quantization_type = quantization_type - try: - import networkx as nx - - self.nx = nx - except ImportError as exc: - raise ImportError( - "NetworkX is not installed. Please install it to use this module." - ) from exc - - def _get_table_name(self, base_name: str) -> str: - """Get the fully qualified table name.""" - return f"{self.project_name}.{base_name}" - - async def create_tables(self): - # raw entities table - # create schema - - vector_column_str = _decorate_vector_type( - f"({self.dimension})", self.quantization_type - ) - - query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name("chunk_entity")} ( - id SERIAL PRIMARY KEY, - category TEXT NOT NULL, - name TEXT NOT NULL, - description TEXT NOT NULL, - extraction_ids UUID[] NOT NULL, - document_id UUID NOT NULL, - attributes JSONB - ); - """ - await self.connection_manager.execute_query(query) - - # raw triples table, also the final table. this will have embeddings. - query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name("chunk_triple")} ( - id SERIAL PRIMARY KEY, - subject TEXT NOT NULL, - predicate TEXT NOT NULL, - object TEXT NOT NULL, - weight FLOAT NOT NULL, - description TEXT NOT NULL, - embedding {vector_column_str}, - extraction_ids UUID[] NOT NULL, - document_id UUID NOT NULL, - attributes JSONB NOT NULL - ); - """ - await self.connection_manager.execute_query(query) - - # embeddings tables - query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name("document_entity")} ( - id SERIAL PRIMARY KEY, - name TEXT NOT NULL, - description TEXT NOT NULL, - extraction_ids UUID[] NOT NULL, - description_embedding {vector_column_str} NOT NULL, - document_id UUID NOT NULL, - UNIQUE (name, document_id) - ); - """ - - await self.connection_manager.execute_query(query) - - # deduplicated entities table - query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name("collection_entity")} ( - id SERIAL PRIMARY KEY, - name TEXT NOT NULL, - description TEXT, - extraction_ids UUID[] NOT NULL, - document_ids UUID[] NOT NULL, - collection_id UUID NOT NULL, - description_embedding {vector_column_str}, - attributes JSONB, - UNIQUE (name, collection_id, attributes) - );""" - - await self.connection_manager.execute_query(query) - - # communities table, result of the Leiden algorithm - query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name("community_info")} ( - id SERIAL PRIMARY KEY, - node TEXT NOT NULL, - cluster INT NOT NULL, - parent_cluster INT, - level INT NOT NULL, - is_final_cluster BOOLEAN NOT NULL, - triple_ids INT[] NOT NULL, - collection_id UUID NOT NULL - );""" - - await self.connection_manager.execute_query(query) - - # communities_report table - query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name("community_report")} ( - id SERIAL PRIMARY KEY, - community_number INT NOT NULL, - collection_id UUID NOT NULL, - level INT NOT NULL, - name TEXT NOT NULL, - summary TEXT NOT NULL, - findings TEXT[] NOT NULL, - rating FLOAT NOT NULL, - rating_explanation TEXT NOT NULL, - embedding {vector_column_str} NOT NULL, - attributes JSONB, - UNIQUE (community_number, level, collection_id) - );""" - - await self.connection_manager.execute_query(query) - - async def _add_objects( - self, - objects: list[Any], - table_name: str, - conflict_columns: list[str] = [], - ) -> asyncpg.Record: - """ - Upsert objects into the specified table. - """ - # Get non-null attributes from the first object - non_null_attrs = {k: v for k, v in objects[0].items() if v is not None} - columns = ", ".join(non_null_attrs.keys()) - - placeholders = ", ".join(f"${i+1}" for i in range(len(non_null_attrs))) - - if conflict_columns: - conflict_columns_str = ", ".join(conflict_columns) - replace_columns_str = ", ".join( - f"{column} = EXCLUDED.{column}" for column in non_null_attrs - ) - on_conflict_query = f"ON CONFLICT ({conflict_columns_str}) DO UPDATE SET {replace_columns_str}" - else: - on_conflict_query = "" - - QUERY = f""" - INSERT INTO {self._get_table_name(table_name)} ({columns}) - VALUES ({placeholders}) - {on_conflict_query} - """ - - # Filter out null values for each object - params = [ - tuple( - (json.dumps(v) if isinstance(v, dict) else v) - for v in obj.values() - if v is not None - ) - for obj in objects - ] - - return await self.connection_manager.execute_many(QUERY, params) # type: ignore - - async def add_entities( - self, - entities: list[Entity], - table_name: str, - conflict_columns: list[str] = [], - ) -> asyncpg.Record: - """ - Upsert entities into the entities_raw table. These are raw entities extracted from the document. - - Args: - entities: list[Entity]: list of entities to upsert - collection_name: str: name of the collection - - Returns: - result: asyncpg.Record: result of the upsert operation - """ - cleaned_entities = [] - for entity in entities: - entity_dict = entity.to_dict() - entity_dict["extraction_ids"] = ( - entity_dict["extraction_ids"] - if entity_dict.get("extraction_ids") - else [] - ) - entity_dict["description_embedding"] = ( - str(entity_dict["description_embedding"]) - if entity_dict.get("description_embedding") - else None - ) - cleaned_entities.append(entity_dict) - - return await self._add_objects( - cleaned_entities, table_name, conflict_columns - ) - - async def add_triples( - self, - triples: list[Triple], - table_name: str = "chunk_triple", - ) -> None: - """ - Upsert triples into the chunk_triple table. These are raw triples extracted from the document. - - Args: - triples: list[Triple]: list of triples to upsert - table_name: str: name of the table to upsert into - - Returns: - result: asyncpg.Record: result of the upsert operation - """ - return await self._add_objects( - [ele.to_dict() for ele in triples], table_name - ) - - async def add_kg_extractions( - self, - kg_extractions: list[KGExtraction], - table_prefix: str = "chunk_", - ) -> Tuple[int, int]: - """ - Upsert entities and triples into the database. These are raw entities and triples extracted from the document fragments. - - Args: - kg_extractions: list[KGExtraction]: list of KG extractions to upsert - table_prefix: str: prefix to add to the table names - - Returns: - total_entities: int: total number of entities upserted - total_relationships: int: total number of relationships upserted - """ - - total_entities, total_relationships = 0, 0 - - for extraction in kg_extractions: - - total_entities, total_relationships = ( - total_entities + len(extraction.entities), - total_relationships + len(extraction.triples), - ) - - if extraction.entities: - if not extraction.entities[0].extraction_ids: - for i in range(len(extraction.entities)): - extraction.entities[i].extraction_ids = ( - extraction.extraction_ids - ) - extraction.entities[i].document_id = ( - extraction.document_id - ) - - await self.add_entities( - extraction.entities, table_name=f"{table_prefix}entity" - ) - - if extraction.triples: - if not extraction.triples[0].extraction_ids: - for i in range(len(extraction.triples)): - extraction.triples[i].extraction_ids = ( - extraction.extraction_ids - ) - extraction.triples[i].document_id = extraction.document_id - - await self.add_triples( - extraction.triples, table_name=f"{table_prefix}triple" - ) - - return (total_entities, total_relationships) - - async def get_entity_map( - self, offset: int, limit: int, document_id: UUID - ) -> dict[str, dict[str, list[dict[str, Any]]]]: - - QUERY1 = f""" - WITH entities_list AS ( - SELECT DISTINCT name - FROM {self._get_table_name("chunk_entity")} - WHERE document_id = $1 - ORDER BY name ASC - LIMIT {limit} OFFSET {offset} - ) - SELECT e.name, e.description, e.category, - (SELECT array_agg(DISTINCT x) FROM unnest(e.extraction_ids) x) AS extraction_ids, - e.document_id - FROM {self._get_table_name("chunk_entity")} e - JOIN entities_list el ON e.name = el.name - GROUP BY e.name, e.description, e.category, e.extraction_ids, e.document_id - ORDER BY e.name;""" - - entities_list = await self.connection_manager.fetch_query( - QUERY1, [document_id] - ) - entities_list = [ - Entity( - name=entity["name"], - description=entity["description"], - category=entity["category"], - extraction_ids=entity["extraction_ids"], - document_id=entity["document_id"], - ) - for entity in entities_list - ] - - QUERY2 = f""" - WITH entities_list AS ( - - SELECT DISTINCT name - FROM {self._get_table_name("chunk_entity")} - WHERE document_id = $1 - ORDER BY name ASC - LIMIT {limit} OFFSET {offset} - ) - - SELECT DISTINCT t.subject, t.predicate, t.object, t.weight, t.description, - (SELECT array_agg(DISTINCT x) FROM unnest(t.extraction_ids) x) AS extraction_ids, t.document_id - FROM {self._get_table_name("chunk_triple")} t - JOIN entities_list el ON t.subject = el.name - ORDER BY t.subject, t.predicate, t.object; - """ - - triples_list = await self.connection_manager.fetch_query( - QUERY2, [document_id] - ) - triples_list = [ - Triple( - subject=triple["subject"], - predicate=triple["predicate"], - object=triple["object"], - weight=triple["weight"], - description=triple["description"], - extraction_ids=triple["extraction_ids"], - document_id=triple["document_id"], - ) - for triple in triples_list - ] - - entity_map: dict[str, dict[str, list[Any]]] = {} - for entity in entities_list: - if entity.name not in entity_map: - entity_map[entity.name] = {"entities": [], "triples": []} - entity_map[entity.name]["entities"].append(entity) - - for triple in triples_list: - if triple.subject in entity_map: - entity_map[triple.subject]["triples"].append(triple) - if triple.object in entity_map: - entity_map[triple.object]["triples"].append(triple) - - return entity_map - - async def upsert_embeddings( - self, - data: list[Tuple[Any]], - table_name: str, - ) -> None: - QUERY = f""" - INSERT INTO {self._get_table_name(table_name)} (name, description, description_embedding, extraction_ids, document_id) - VALUES ($1, $2, $3, $4, $5) - ON CONFLICT (name, document_id) DO UPDATE SET - description = EXCLUDED.description, - description_embedding = EXCLUDED.description_embedding, - extraction_ids = EXCLUDED.extraction_ids, - document_id = EXCLUDED.document_id - """ - return await self.connection_manager.execute_many(QUERY, data) - - async def upsert_entities(self, entities: list[Entity]) -> None: - QUERY = """ - INSERT INTO $1.$2 (category, name, description, description_embedding, extraction_ids, document_id, attributes) - VALUES ($1, $2, $3, $4, $5, $6, $7) - """ - - table_name = self._get_table_name("entities") - query = QUERY.format(table_name) - await self.connection_manager.execute_query(query, entities) - - async def vector_query( # type: ignore - self, query: str, **kwargs: Any - ) -> AsyncGenerator[Any, None]: - - query_embedding = kwargs.get("query_embedding", None) - search_type = kwargs.get("search_type", "__Entity__") - embedding_type = kwargs.get("embedding_type", "description_embedding") - property_names = kwargs.get("property_names", ["name", "description"]) - filters = kwargs.get("filters", {}) - entities_level = kwargs.get("entities_level", EntityLevel.DOCUMENT) - limit = kwargs.get("limit", 10) - - table_name = "" - if search_type == "__Entity__": - table_name = ( - "collection_entity" - if entities_level == EntityLevel.COLLECTION - else "document_entity" - ) - elif search_type == "__Relationship__": - table_name = "chunk_triple" - elif search_type == "__Community__": - table_name = "community_report" - else: - raise ValueError(f"Invalid search type: {search_type}") - - property_names_str = ", ".join(property_names) - - collection_ids_dict = filters.get("collection_ids", {}) - filter_query = "" - if collection_ids_dict: - filter_query = "WHERE collection_id = ANY($3)" - filter_ids = collection_ids_dict["$overlap"] - - if ( - search_type == "__Community__" - or table_name == "collection_entity" - ): - logger.info(f"Searching in collection ids: {filter_ids}") - - elif search_type in ["__Entity__", "__Relationship__"]: - filter_query = "WHERE document_id = ANY($3)" - # TODO - This seems like a hack, we will need a better way to filter by collection ids for entities and relationships - query = f""" - SELECT distinct document_id FROM {self._get_table_name('document_info')} WHERE $1 = ANY(collection_ids) - """ - filter_ids = [ - doc_id["document_id"] - for doc_id in await self.connection_manager.fetch_query( - query, filter_ids - ) - ] - logger.info(f"Searching in document ids: {filter_ids}") - - QUERY = f""" - SELECT {property_names_str} FROM {self._get_table_name(table_name)} {filter_query} ORDER BY {embedding_type} <=> $1 LIMIT $2; - """ - - if filter_query != "": - results = await self.connection_manager.fetch_query( - QUERY, (str(query_embedding), limit, filter_ids) - ) - else: - results = await self.connection_manager.fetch_query( - QUERY, (str(query_embedding), limit) - ) - - for result in results: - yield { - property_name: result[property_name] - for property_name in property_names - } - - async def get_all_triples( - self, collection_id: UUID, document_ids: Optional[list[UUID]] = None - ) -> list[Triple]: - - # getting all documents for a collection - if document_ids is None: - QUERY = f""" - select distinct document_id from {self._get_table_name("document_info")} where $1 = ANY(collection_ids) - """ - document_ids_list = await self.connection_manager.fetch_query( - QUERY, [collection_id] - ) - document_ids = [ - doc_id["document_id"] for doc_id in document_ids_list - ] - - QUERY = f""" - SELECT id, subject, predicate, weight, object, document_id FROM {self._get_table_name("chunk_triple")} WHERE document_id = ANY($1) - """ - triples = await self.connection_manager.fetch_query( - QUERY, [document_ids] - ) - return [Triple(**triple) for triple in triples] - - async def add_community_info( - self, communities: list[CommunityInfo] - ) -> None: - QUERY = f""" - INSERT INTO {self._get_table_name("community_info")} (node, cluster, parent_cluster, level, is_final_cluster, triple_ids, collection_id) - VALUES ($1, $2, $3, $4, $5, $6, $7) - """ - communities_tuples_list = [ - ( - community.node, - community.cluster, - community.parent_cluster, - community.level, - community.is_final_cluster, - community.triple_ids, - community.collection_id, - ) - for community in communities - ] - await self.connection_manager.execute_many( - QUERY, communities_tuples_list - ) - - async def get_communities( - self, - collection_id: Optional[UUID] = None, - levels: Optional[list[int]] = None, - community_numbers: Optional[list[int]] = None, - offset: Optional[int] = 0, - limit: Optional[int] = -1, - ) -> dict: - conditions = [] - params: list = [collection_id] - param_index = 2 - - if levels is not None: - conditions.append(f"level = ANY(${param_index})") - params.append(levels) - param_index += 1 - - if community_numbers is not None: - conditions.append(f"community_number = ANY(${param_index})") - params.append(community_numbers) - param_index += 1 - - pagination_params = [] - if offset: - pagination_params.append(f"OFFSET ${param_index}") - params.append(offset) - param_index += 1 - - if limit != -1: - pagination_params.append(f"LIMIT ${param_index}") - params.append(limit) - param_index += 1 - - pagination_clause = " ".join(pagination_params) - - query = f""" - SELECT id, community_number, collection_id, level, name, summary, findings, rating, rating_explanation, COUNT(*) OVER() AS total_entries - FROM {self._get_table_name('community_report')} - WHERE collection_id = $1 - {" AND " + " AND ".join(conditions) if conditions else ""} - ORDER BY community_number - {pagination_clause} - """ - - results = await self.connection_manager.fetch_query(query, params) - total_entries = results[0]["total_entries"] if results else 0 - communities = [CommunityReport(**community) for community in results] - - return { - "communities": communities, - "total_entries": total_entries, - } - - async def add_community_report( - self, community_report: CommunityReport - ) -> None: - - # TODO: Fix in the short term. - # we need to do this because postgres insert needs to be a string - community_report.embedding = str(community_report.embedding) # type: ignore[assignment] - - non_null_attrs = { - k: v for k, v in community_report.__dict__.items() if v is not None - } - columns = ", ".join(non_null_attrs.keys()) - placeholders = ", ".join(f"${i+1}" for i in range(len(non_null_attrs))) - - conflict_columns = ", ".join( - [f"{k} = EXCLUDED.{k}" for k in non_null_attrs] - ) - - QUERY = f""" - INSERT INTO {self._get_table_name("community_report")} ({columns}) - VALUES ({placeholders}) - ON CONFLICT (community_number, level, collection_id) DO UPDATE SET - {conflict_columns} - """ - - await self.connection_manager.execute_many( - QUERY, [tuple(non_null_attrs.values())] - ) - - async def _create_graph_and_cluster( - self, triples: list[Triple], leiden_params: dict[str, Any] - ) -> Any: - - G = self.nx.Graph() - for triple in triples: - G.add_edge( - triple.subject, - triple.object, - weight=triple.weight, - id=triple.id, - ) - - hierarchical_communities = await self._compute_leiden_communities( - G, leiden_params - ) - - return hierarchical_communities - - async def _cluster_and_add_community_info( - self, - triples: list[Triple], - triple_ids_cache: dict[str, list[int]], - leiden_params: dict[str, Any], - collection_id: UUID, - ) -> int: - - # clear if there is any old information - QUERY = f""" - DELETE FROM {self._get_table_name("community_info")} WHERE collection_id = $1 - """ - await self.connection_manager.execute_query(QUERY, [collection_id]) - - QUERY = f""" - DELETE FROM {self._get_table_name("community_report")} WHERE collection_id = $1 - """ - await self.connection_manager.execute_query(QUERY, [collection_id]) - - start_time = time.time() - - hierarchical_communities = await self._create_graph_and_cluster( - triples, leiden_params - ) - - logger.info( - f"Computing Leiden communities completed, time {time.time() - start_time:.2f} seconds." - ) - - def triple_ids(node: str) -> list[int]: - return triple_ids_cache.get(node, []) - - logger.info( - f"Cached {len(triple_ids_cache)} triple ids, time {time.time() - start_time:.2f} seconds." - ) - - # upsert the communities into the database. - inputs = [ - CommunityInfo( - node=str(item.node), - cluster=item.cluster, - parent_cluster=item.parent_cluster, - level=item.level, - is_final_cluster=item.is_final_cluster, - triple_ids=triple_ids(item.node), - collection_id=collection_id, - ) - for item in hierarchical_communities - ] - - await self.add_community_info(inputs) - - num_communities = ( - max([item.cluster for item in hierarchical_communities]) + 1 - ) - - logger.info( - f"Generated {num_communities} communities, time {time.time() - start_time:.2f} seconds." - ) - - return num_communities - - async def _use_community_cache( - self, collection_id: UUID, triple_ids_cache: dict[str, list[int]] - ) -> bool: - - # check if status is enriched or stale - QUERY = f""" - SELECT kg_enrichment_status FROM {self._get_table_name("collections")} WHERE collection_id = $1 - """ - status = ( - await self.connection_manager.fetchrow_query( - QUERY, [collection_id] - ) - )["kg_enrichment_status"] - if status == KGEnrichmentStatus.PENDING: - return False - - # check the number of entities in the cache. - QUERY = f""" - SELECT COUNT(distinct node) FROM {self._get_table_name("community_info")} WHERE collection_id = $1 - """ - num_entities = ( - await self.connection_manager.fetchrow_query( - QUERY, [collection_id] - ) - )["count"] - - # a hard threshold of 80% of the entities in the cache. - if num_entities > 0.8 * len(triple_ids_cache): - return True - else: - return False - - async def _get_triple_ids_cache( - self, triples: list[Triple] - ) -> dict[str, list[int]]: - - # caching the triple ids - triple_ids_cache = dict[str, list[int]]() - for triple in triples: - if ( - triple.subject not in triple_ids_cache - and triple.subject is not None - ): - triple_ids_cache[triple.subject] = [] - if ( - triple.object not in triple_ids_cache - and triple.object is not None - ): - triple_ids_cache[triple.object] = [] - if triple.subject is not None and triple.id is not None: - triple_ids_cache[triple.subject].append(triple.id) - if triple.object is not None and triple.id is not None: - triple_ids_cache[triple.object].append(triple.id) - - return triple_ids_cache - - async def _incremental_clustering( - self, - triple_ids_cache: dict[str, list[int]], - leiden_params: dict[str, Any], - collection_id: UUID, - ) -> int: - """ - Performs incremental clustering on new triples by: - 1. Getting all triples and new triples - 2. Getting community mapping for all existing triples - 3. For each new triple: - - Check if subject/object exists in community mapping - - If exists, add its cluster to updated communities set - - If not, append triple to new_triple_ids list for clustering - 4. Run hierarchical clustering on new_triple_ids list - 5. Update community info table with new clusters, offsetting IDs by max_cluster_id - """ - - QUERY = f""" - SELECT node, cluster, is_final_cluster FROM {self._get_table_name("community_info")} WHERE collection_id = $1 - """ - - communities = await self.connection_manager.fetch_query( - QUERY, [collection_id] - ) - max_cluster_id = max( - [community["cluster"] for community in communities] - ) - - # TODO: modify above query to get a dict grouped by node (without aggregation) - communities_dict = {} # type: ignore - for community in communities: - if community["node"] not in communities_dict: - communities_dict[community["node"]] = [] - communities_dict[community["node"]].append(community) - - QUERY = f""" - SELECT document_id FROM {self._get_table_name("document_info")} WHERE $1 = ANY(collection_ids) and kg_extraction_status = $2 - """ - - new_document_ids = await self.connection_manager.fetch_query( - QUERY, [collection_id, KGExtractionStatus.SUCCESS] - ) - - new_triple_ids = await self.get_all_triples( - collection_id, new_document_ids - ) - - # community mapping for new triples - updated_communities = set() - new_triples = [] - for triple in new_triple_ids: - # bias towards subject - if triple.subject in communities_dict: - for community in communities_dict[triple.subject]: - updated_communities.add(community["cluster"]) - elif triple.object in communities_dict: - for community in communities_dict[triple.object]: - updated_communities.add(community["cluster"]) - else: - new_triples.append(triple) - - # delete the communities information for the updated communities - QUERY = f""" - DELETE FROM {self._get_table_name("community_report")} WHERE collection_id = $1 AND community_number = ANY($2) - """ - await self.connection_manager.execute_query( - QUERY, [collection_id, updated_communities] - ) - - hierarchical_communities_output = await self._create_graph_and_cluster( - new_triples, leiden_params - ) - - community_info = [] - for community in hierarchical_communities_output: - community_info.append( - CommunityInfo( - node=community.node, - cluster=community.cluster + max_cluster_id, - parent_cluster=( - community.parent_cluster + max_cluster_id - if community.parent_cluster is not None - else None - ), - level=community.level, - triple_ids=[], # FIXME: need to get the triple ids for the community - is_final_cluster=community.is_final_cluster, - collection_id=collection_id, - ) - ) - - await self.add_community_info(community_info) - num_communities = max([item.cluster for item in community_info]) + 1 - return num_communities - - async def perform_graph_clustering( - self, - collection_id: UUID, - leiden_params: dict[str, Any], - ) -> int: - """ - Leiden clustering algorithm to cluster the knowledge graph triples into communities. - - Available parameters and defaults: - max_cluster_size: int = 1000, - starting_communities: Optional[dict[str, int]] = None, - extra_forced_iterations: int = 0, - resolution: Union[int, float] = 1.0, - randomness: Union[int, float] = 0.001, - use_modularity: bool = True, - random_seed: Optional[int] = None, - weight_attribute: str = "weight", - is_weighted: Optional[bool] = None, - weight_default: Union[int, float] = 1.0, - check_directed: bool = True, - """ - - start_time = time.time() - - triples = await self.get_all_triples(collection_id) - - logger.info(f"Clustering with settings: {leiden_params}") - - triple_ids_cache = await self._get_triple_ids_cache(triples) - - if await self._use_community_cache(collection_id, triple_ids_cache): - num_communities = await self._incremental_clustering( - triple_ids_cache, leiden_params, collection_id - ) - else: - num_communities = await self._cluster_and_add_community_info( - triples, triple_ids_cache, leiden_params, collection_id - ) - - return num_communities - - async def _compute_leiden_communities( - self, - graph: Any, - leiden_params: dict[str, Any], - ) -> Any: - """Compute Leiden communities.""" - try: - from graspologic.partition import hierarchical_leiden - - if "random_seed" not in leiden_params: - leiden_params["random_seed"] = ( - 7272 # add seed to control randomness - ) - - start_time = time.time() - logger.info( - f"Running Leiden clustering with params: {leiden_params}" - ) - - community_mapping = hierarchical_leiden(graph, **leiden_params) - - logger.info( - f"Leiden clustering completed in {time.time() - start_time:.2f} seconds." - ) - return community_mapping - - except ImportError as e: - raise ImportError("Please install the graspologic package.") from e - - async def get_community_details( - self, community_number: int, collection_id: UUID - ) -> Tuple[int, list[Entity], list[Triple]]: - - QUERY = f""" - SELECT level FROM {self._get_table_name("community_info")} WHERE cluster = $1 AND collection_id = $2 - LIMIT 1 - """ - level = ( - await self.connection_manager.fetch_query( - QUERY, [community_number, collection_id] - ) - )[0]["level"] - - # selecting table name based on entity level - # check if there are any entities in the community that are not in the entity_embedding table - query = f""" - SELECT COUNT(*) FROM {self._get_table_name("collection_entity")} WHERE collection_id = $1 - """ - entity_count = ( - await self.connection_manager.fetch_query(query, [collection_id]) - )[0]["count"] - table_name = ( - "collection_entity" if entity_count > 0 else "document_entity" - ) - - QUERY = f""" - WITH node_triple_ids AS ( - SELECT node, triple_ids - FROM {self._get_table_name("community_info")} - WHERE cluster = $1 AND collection_id = $2 - ) - SELECT DISTINCT - e.id AS id, - e.name AS name, - e.description AS description - FROM node_triple_ids nti - JOIN {self._get_table_name(table_name)} e ON e.name = nti.node; - """ - entities = await self.connection_manager.fetch_query( - QUERY, [community_number, collection_id] - ) - entities = [Entity(**entity) for entity in entities] - - QUERY = f""" - WITH node_triple_ids AS ( - SELECT node, triple_ids - FROM {self._get_table_name("community_info")} - WHERE cluster = $1 and collection_id = $2 - ) - SELECT DISTINCT - t.id, t.subject, t.predicate, t.object, t.weight, t.description - FROM node_triple_ids nti - JOIN {self._get_table_name("chunk_triple")} t ON t.id = ANY(nti.triple_ids); - """ - triples = await self.connection_manager.fetch_query( - QUERY, [community_number, collection_id] - ) - triples = [Triple(**triple) for triple in triples] - - return level, entities, triples - - # async def client(self): - # return None - - async def get_community_reports( - self, collection_id: UUID - ) -> list[CommunityReport]: - QUERY = f""" - SELECT *c FROM {self._get_table_name("community_report")} WHERE collection_id = $1 - """ - return await self.connection_manager.fetch_query( - QUERY, [collection_id] - ) - - async def check_community_reports_exist( - self, collection_id: UUID, offset: int, limit: int - ) -> list[int]: - QUERY = f""" - SELECT distinct community_number FROM {self._get_table_name("community_report")} WHERE collection_id = $1 AND community_number >= $2 AND community_number < $3 - """ - community_numbers = await self.connection_manager.fetch_query( - QUERY, [collection_id, offset, offset + limit] - ) - return [item["community_number"] for item in community_numbers] - - async def delete_graph_for_collection( - self, collection_id: UUID, cascade: bool = False - ) -> None: - - # don't delete if status is PROCESSING. - QUERY = f""" - SELECT kg_enrichment_status FROM {self._get_table_name("collections")} WHERE collection_id = $1 - """ - status = ( - await self.connection_manager.fetch_query(QUERY, [collection_id]) - )[0]["kg_enrichment_status"] - if status == KGExtractionStatus.PROCESSING.value: - return - - # remove all triples for these documents. - DELETE_QUERIES = [ - f"DELETE FROM {self._get_table_name('community_info')} WHERE collection_id = $1;", - f"DELETE FROM {self._get_table_name('community_report')} WHERE collection_id = $1;", - ] - - document_ids_response = ( - await self.collection_handler.documents_in_collection( - collection_id - ) - ) - - # This type ignore is due to insufficient typing of the documents_in_collection method - document_ids = [doc.id for doc in document_ids_response["results"]] # type: ignore - - # TODO: make these queries more efficient. Pass the document_ids as params. - if cascade: - DELETE_QUERIES += [ - f"DELETE FROM {self._get_table_name('chunk_entity')} WHERE document_id = ANY($1::uuid[]);", - f"DELETE FROM {self._get_table_name('chunk_triple')} WHERE document_id = ANY($1::uuid[]);", - f"DELETE FROM {self._get_table_name('document_entity')} WHERE document_id = ANY($1::uuid[]);", - f"DELETE FROM {self._get_table_name('collection_entity')} WHERE collection_id = $1;", - ] - - # setting the kg_creation_status to PENDING for this collection. - QUERY = f""" - UPDATE {self._get_table_name("document_info")} SET kg_extraction_status = $1 WHERE $2::uuid = ANY(collection_ids) - """ - await self.connection_manager.execute_query( - QUERY, [KGExtractionStatus.PENDING, collection_id] - ) - - for query in DELETE_QUERIES: - if "community" in query or "collection_entity" in query: - await self.connection_manager.execute_query( - query, [collection_id] - ) - else: - await self.connection_manager.execute_query( - query, [document_ids] - ) - - # set status to PENDING for this collection. - QUERY = f""" - UPDATE {self._get_table_name("collections")} SET kg_enrichment_status = $1 WHERE collection_id = $2 - """ - await self.connection_manager.execute_query( - QUERY, [KGExtractionStatus.PENDING, collection_id] - ) - - async def delete_node_via_document_id( - self, document_id: UUID, collection_id: UUID - ) -> None: - # don't delete if status is PROCESSING. - QUERY = f""" - SELECT kg_enrichment_status FROM {self._get_table_name("collections")} WHERE collection_id = $1 - """ - status = ( - await self.connection_manager.fetch_query(QUERY, [collection_id]) - )[0]["kg_enrichment_status"] - if status == KGExtractionStatus.PROCESSING.value: - return - - # Execute separate DELETE queries - delete_queries = [ - f"DELETE FROM {self._get_table_name('chunk_entity')} WHERE document_id = $1", - f"DELETE FROM {self._get_table_name('chunk_triple')} WHERE document_id = $1", - f"DELETE FROM {self._get_table_name('document_entity')} WHERE document_id = $1", - ] - - for query in delete_queries: - await self.connection_manager.execute_query(query, [document_id]) - - # Check if this is the last document in the collection - documents = await self.collection_handler.documents_in_collection( - collection_id - ) - count = documents["total_entries"] - - if count == 0: - # If it's the last document, delete collection-related data - collection_queries = [ - f"DELETE FROM {self._get_table_name('community_info')} WHERE collection_id = $1", - f"DELETE FROM {self._get_table_name('community_report')} WHERE collection_id = $1", - ] - for query in collection_queries: - await self.connection_manager.execute_query( - query, [collection_id] - ) # Ensure collection_id is in a list - - # set status to PENDING for this collection. - QUERY = f""" - UPDATE {self._get_table_name("collections")} SET kg_enrichment_status = $1 WHERE collection_id = $2 - """ - await self.connection_manager.execute_query( - QUERY, [KGExtractionStatus.PENDING, collection_id] - ) - return None - return None - - def _get_str_estimation_output(self, x: tuple[Any, Any]) -> str: - if isinstance(x[0], int) and isinstance(x[1], int): - return " - ".join(map(str, x)) - else: - return " - ".join(f"{round(a, 2)}" for a in x) - - async def get_existing_entity_extraction_ids( - self, document_id: UUID - ) -> list[str]: - QUERY = f""" - SELECT DISTINCT unnest(extraction_ids) AS extraction_id FROM {self._get_table_name("chunk_entity")} WHERE document_id = $1 - """ - return [ - item["extraction_id"] - for item in await self.connection_manager.fetch_query( - QUERY, [document_id] - ) - ] - - async def get_creation_estimate( - self, collection_id: UUID, kg_creation_settings: KGCreationSettings - ) -> KGCreationEstimationResponse: - - # todo: harmonize the document_id and id fields: postgres table contains document_id, but other places use id. - document_ids = [ - doc.id - for doc in ( - await self.collection_handler.documents_in_collection(collection_id) # type: ignore - )["results"] - ] - - query = f""" - SELECT document_id, COUNT(*) as chunk_count - FROM {self._get_table_name("vectors")} - WHERE document_id = ANY($1) - GROUP BY document_id - """ - - chunk_counts = await self.connection_manager.fetch_query( - query, [document_ids] - ) - - total_chunks = ( - sum(doc["chunk_count"] for doc in chunk_counts) - // kg_creation_settings.extraction_merge_count - ) # 4 chunks per llm - estimated_entities = ( - total_chunks * 10, - total_chunks * 20, - ) # 25 entities per 4 chunks - estimated_triples = ( - int(estimated_entities[0] * 1.25), - int(estimated_entities[1] * 1.5), - ) # Assuming 1.25 triples per entity on average - - estimated_llm_calls = ( - total_chunks * 2 + estimated_entities[0], - total_chunks * 2 + estimated_entities[1], - ) - - total_in_out_tokens = ( - 2000 * estimated_llm_calls[0] // 1000000, - 2000 * estimated_llm_calls[1] // 1000000, - ) # in millions - - estimated_cost = ( - total_in_out_tokens[0] - * llm_cost_per_million_tokens( - kg_creation_settings.generation_config.model - ), - total_in_out_tokens[1] - * llm_cost_per_million_tokens( - kg_creation_settings.generation_config.model - ), - ) - - total_time_in_minutes = ( - total_in_out_tokens[0] * 10 / 60, - total_in_out_tokens[1] * 10 / 60, - ) # 10 minutes per million tokens - - return KGCreationEstimationResponse( - message='Ran Graph Creation Estimate (not the actual run). Note that these are estimated ranges, actual values may vary. To run the KG creation process, run `create-graph` with `--run` in the cli, or `run_type="run"` in the client.', - document_count=len(document_ids), - number_of_jobs_created=len(document_ids) + 1, - total_chunks=total_chunks, - estimated_entities=self._get_str_estimation_output( - estimated_entities - ), - estimated_triples=self._get_str_estimation_output( - estimated_triples - ), - estimated_llm_calls=self._get_str_estimation_output( - estimated_llm_calls - ), - estimated_total_in_out_tokens_in_millions=self._get_str_estimation_output( - total_in_out_tokens - ), - estimated_cost_in_usd=self._get_str_estimation_output( - estimated_cost - ), - estimated_total_time_in_minutes="Depends on your API key tier. Accurate estimate coming soon. Rough estimate: " - + self._get_str_estimation_output(total_time_in_minutes), - ) - - async def get_enrichment_estimate( - self, collection_id: UUID, kg_enrichment_settings: KGEnrichmentSettings - ) -> KGEnrichmentEstimationResponse: - - document_ids = [ - doc.id - for doc in ( - await self.collection_handler.documents_in_collection(collection_id) # type: ignore - )["results"] - ] - - QUERY = f""" - SELECT COUNT(*) FROM {self._get_table_name("document_entity")} WHERE document_id = ANY($1); - """ - entity_count = ( - await self.connection_manager.fetch_query(QUERY, [document_ids]) - )[0]["count"] - - if not entity_count: - raise ValueError( - "No entities found in the graph. Please run `create-graph` first." - ) - - QUERY = f""" - SELECT COUNT(*) FROM {self._get_table_name("chunk_triple")} WHERE document_id = ANY($1); - """ - triple_count = ( - await self.connection_manager.fetch_query(QUERY, [document_ids]) - )[0]["count"] - - estimated_llm_calls = (entity_count // 10, entity_count // 5) - estimated_total_in_out_tokens_in_millions = ( - 2000 * estimated_llm_calls[0] / 1000000, - 2000 * estimated_llm_calls[1] / 1000000, - ) - cost_per_million_tokens = llm_cost_per_million_tokens( - kg_enrichment_settings.generation_config.model - ) - estimated_cost = ( - estimated_total_in_out_tokens_in_millions[0] - * cost_per_million_tokens, - estimated_total_in_out_tokens_in_millions[1] - * cost_per_million_tokens, - ) - - estimated_total_time = ( - estimated_total_in_out_tokens_in_millions[0] * 10 / 60, - estimated_total_in_out_tokens_in_millions[1] * 10 / 60, - ) - - return KGEnrichmentEstimationResponse( - message='Ran Graph Enrichment Estimate (not the actual run). Note that these are estimated ranges, actual values may vary. To run the KG enrichment process, run `enrich-graph` with `--run` in the cli, or `run_type="run"` in the client.', - total_entities=entity_count, - total_triples=triple_count, - estimated_llm_calls=self._get_str_estimation_output( - estimated_llm_calls - ), - estimated_total_in_out_tokens_in_millions=self._get_str_estimation_output( - estimated_total_in_out_tokens_in_millions - ), - estimated_cost_in_usd=self._get_str_estimation_output( - estimated_cost - ), - estimated_total_time_in_minutes="Depends on your API key tier. Accurate estimate coming soon. Rough estimate: " - + self._get_str_estimation_output(estimated_total_time), - ) - - async def create_vector_index(self): - # need to implement this. Just call vector db provider's create_vector_index method. - # this needs to be run periodically for every collection. - raise NotImplementedError - - async def delete_triples(self, triple_ids: list[int]): - # need to implement this. - raise NotImplementedError - - async def get_schema(self): - # somehow get the rds from the postgres db. - raise NotImplementedError - - async def get_entities( - self, - collection_id: Optional[UUID] = None, - entity_ids: Optional[list[str]] = None, - entity_names: Optional[list[str]] = None, - entity_table_name: str = "document_entity", - extra_columns: Optional[list[str]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict: - conditions = [] - params: list = [collection_id] - param_index = 2 - - if entity_ids: - conditions.append(f"id = ANY(${param_index})") - params.append(entity_ids) - param_index += 1 - - if entity_names: - conditions.append(f"name = ANY(${param_index})") - params.append(entity_names) - param_index += 1 - - pagination_params = [] - if offset: - pagination_params.append(f"OFFSET ${param_index}") - params.append(offset) - param_index += 1 - - if limit != -1: - pagination_params.append(f"LIMIT ${param_index}") - params.append(limit) - param_index += 1 - - pagination_clause = " ".join(pagination_params) - - if entity_table_name == "collection_entity": - query = f""" - SELECT id, name, description, extraction_ids, document_ids {", " + ", ".join(extra_columns) if extra_columns else ""} - FROM {self._get_table_name(entity_table_name)} - WHERE collection_id = $1 - {" AND " + " AND ".join(conditions) if conditions else ""} - ORDER BY id - {pagination_clause} - """ - else: - query = f""" - SELECT id, name, description, extraction_ids, document_id {", " + ", ".join(extra_columns) if extra_columns else ""} - FROM {self._get_table_name(entity_table_name)} - WHERE document_id = ANY( - SELECT document_id FROM {self._get_table_name("document_info")} - WHERE $1 = ANY(collection_ids) - ) - {" AND " + " AND ".join(conditions) if conditions else ""} - ORDER BY id - {pagination_clause} - """ - - results = await self.connection_manager.fetch_query(query, params) - entities = [Entity(**entity) for entity in results] - - total_entries = await self.get_entity_count( - collection_id=collection_id, entity_table_name=entity_table_name - ) - - return {"entities": entities, "total_entries": total_entries} - - async def get_triples( - self, - collection_id: Optional[UUID] = None, - entity_names: Optional[list[str]] = None, - triple_ids: Optional[list[str]] = None, - offset: Optional[int] = 0, - limit: Optional[int] = -1, - ) -> dict: - conditions = [] - params: list = [str(collection_id)] - param_index = 2 - - if triple_ids: - conditions.append(f"id = ANY(${param_index})") - params.append(triple_ids) - param_index += 1 - - if entity_names: - conditions.append( - f"subject = ANY(${param_index}) or object = ANY(${param_index})" - ) - params.append(entity_names) - param_index += 1 - - pagination_params = [] - if offset: - pagination_params.append(f"OFFSET ${param_index}") - params.append(offset) - param_index += 1 - - if limit != -1: - pagination_params.append(f"LIMIT ${param_index}") - params.append(limit) - param_index += 1 - - pagination_clause = " ".join(pagination_params) - - query = f""" - SELECT id, subject, predicate, object, description - FROM {self._get_table_name("chunk_triple")} - WHERE document_id = ANY( - SELECT document_id FROM {self._get_table_name("document_info")} - WHERE $1 = ANY(collection_ids) - ) - {" AND " + " AND ".join(conditions) if conditions else ""} - ORDER BY id - {pagination_clause} - """ - - triples = await self.connection_manager.fetch_query(query, params) - triples = [Triple(**triple) for triple in triples] - total_entries = await self.get_triple_count( - collection_id=collection_id - ) - - return {"triples": triples, "total_entries": total_entries} - - async def structured_query(self): - raise NotImplementedError - - async def update_extraction_prompt(self): - raise NotImplementedError - - async def update_kg_search_prompt(self): - raise NotImplementedError - - async def upsert_triples(self): - raise NotImplementedError - - async def get_entity_count( - self, - collection_id: Optional[UUID] = None, - document_id: Optional[UUID] = None, - distinct: bool = False, - entity_table_name: str = "document_entity", - ) -> int: - if collection_id is None and document_id is None: - raise ValueError( - "Either collection_id or document_id must be provided." - ) - - conditions = [] - params = [] - - if entity_table_name == "collection_entity": - if document_id: - raise ValueError( - "document_id is not supported for collection_entity table" - ) - conditions.append("collection_id = $1") - params.append(str(collection_id)) - elif collection_id: - conditions.append( - f""" - document_id = ANY( - SELECT document_id FROM {self._get_table_name("document_info")} - WHERE $1 = ANY(collection_ids) - ) - """ - ) - params.append(str(collection_id)) - else: - conditions.append("document_id = $1") - params.append(str(document_id)) - - count_value = "DISTINCT name" if distinct else "*" - - QUERY = f""" - SELECT COUNT({count_value}) FROM {self._get_table_name(entity_table_name)} - WHERE {" AND ".join(conditions)} - """ - return (await self.connection_manager.fetch_query(QUERY, params))[0][ - "count" - ] - - async def get_triple_count( - self, - collection_id: Optional[UUID] = None, - document_id: Optional[UUID] = None, - ) -> int: - if collection_id is None and document_id is None: - raise ValueError( - "Either collection_id or document_id must be provided." - ) - - conditions = [] - params = [] - - if collection_id: - conditions.append( - f""" - document_id = ANY( - SELECT document_id FROM {self._get_table_name("document_info")} - WHERE $1 = ANY(collection_ids) - ) - """ - ) - params.append(str(collection_id)) - else: - conditions.append("document_id = $1") - params.append(str(document_id)) - - QUERY = f""" - SELECT COUNT(*) FROM {self._get_table_name("chunk_triple")} - WHERE {" AND ".join(conditions)} - """ - return (await self.connection_manager.fetch_query(QUERY, params))[0][ - "count" - ] - - async def update_entity_descriptions(self, entities: list[Entity]): - - query = f""" - UPDATE {self._get_table_name("collection_entity")} - SET description = $3, description_embedding = $4 - WHERE name = $1 AND collection_id = $2 - """ - - inputs = [ - ( - entity.name, - entity.collection_id, - entity.description, - entity.description_embedding, - ) - for entity in entities - ] - - await self.connection_manager.execute_many(query, inputs) # type: ignore - - async def get_deduplication_estimate( - self, - collection_id: UUID, - kg_deduplication_settings: KGEntityDeduplicationSettings, - ): - try: - # number of documents in collection - query = f""" - SELECT name, count(name) - FROM {self._get_table_name("document_entity")} - WHERE document_id = ANY( - SELECT document_id FROM {self._get_table_name("document_info")} - WHERE $1 = ANY(collection_ids) - ) - GROUP BY name - HAVING count(name) >= 5 - """ - entities = await self.connection_manager.fetch_query( - query, [collection_id] - ) - num_entities = len(entities) - - estimated_llm_calls = (num_entities, num_entities) - estimated_total_in_out_tokens_in_millions = ( - estimated_llm_calls[0] * 1000 / 1000000, - estimated_llm_calls[1] * 5000 / 1000000, - ) - estimated_cost_in_usd = ( - estimated_total_in_out_tokens_in_millions[0] - * llm_cost_per_million_tokens( - kg_deduplication_settings.generation_config.model - ), - estimated_total_in_out_tokens_in_millions[1] - * llm_cost_per_million_tokens( - kg_deduplication_settings.generation_config.model - ), - ) - - estimated_total_time_in_minutes = ( - estimated_total_in_out_tokens_in_millions[0] * 10 / 60, - estimated_total_in_out_tokens_in_millions[1] * 10 / 60, - ) - - return KGDeduplicationEstimationResponse( - message='Ran Deduplication Estimate (not the actual run). Note that these are estimated ranges, actual values may vary. To run the Deduplication process, run `deduplicate-entities` with `--run` in the cli, or `run_type="run"` in the client.', - num_entities=num_entities, - estimated_llm_calls=self._get_str_estimation_output( - estimated_llm_calls - ), - estimated_total_in_out_tokens_in_millions=self._get_str_estimation_output( - estimated_total_in_out_tokens_in_millions - ), - estimated_cost_in_usd=self._get_str_estimation_output( - estimated_cost_in_usd - ), - estimated_total_time_in_minutes=self._get_str_estimation_output( - estimated_total_time_in_minutes - ), - ) - except UndefinedTableError as e: - logger.error( - f"Entity embedding table not found. Please run `create-graph` first. {str(e)}" - ) - raise R2RException( - message="Entity embedding table not found. Please run `create-graph` first.", - status_code=404, - ) - except PostgresError as e: - logger.error( - f"Database error in get_deduplication_estimate: {str(e)}" - ) - raise HTTPException( - status_code=500, - detail="An error occurred while fetching the deduplication estimate.", - ) - except Exception as e: - logger.error( - f"Unexpected error in get_deduplication_estimate: {str(e)}" - ) - raise HTTPException( - status_code=500, - detail="An unexpected error occurred while fetching the deduplication estimate.", - ) diff --git a/py/core/providers/database/logging.py b/py/core/providers/database/logging.py index fd1742fc6..1a2bd1b31 100644 --- a/py/core/providers/database/logging.py +++ b/py/core/providers/database/logging.py @@ -1,7 +1,6 @@ import json import os -from datetime import datetime -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import Any, Optional, Tuple, Union from uuid import UUID from core.base import LoggingHandler, Message @@ -120,8 +119,8 @@ async def info_log( ) async def get_logs( - self, run_ids: List[UUID], limit_per_run: int = 10 - ) -> List[Dict[str, Any]]: + self, run_ids: list[UUID], limit_per_run: int = 10 + ) -> list[dict[str, Any]]: """Retrieve logs for specified run IDs.""" if not run_ids: raise ValueError("No run ids provided") @@ -149,14 +148,14 @@ async def get_logs( async def get_info_logs( self, - offset: int = 0, - limit: int = 100, + offset: int, + limit: int, run_type_filter: Optional[RunType] = None, - user_ids: Optional[List[UUID]] = None, - ) -> List[RunInfoLog]: + user_ids: Optional[list[UUID]] = None, + ) -> list[RunInfoLog]: """Retrieve run information logs with filtering options.""" conditions = [] - params: List[Any] = [] + params: list[Any] = [] query = f""" SELECT run_id, run_type, timestamp, user_id @@ -208,12 +207,12 @@ async def delete_conversation(self, conversation_id: str) -> None: query, [UUID(conversation_id)] ) - async def get_conversations_overview( + async def get_conversations( self, - conversation_ids: Optional[List[UUID]] = None, - offset: int = 0, - limit: int = -1, - ) -> Dict[str, Union[List[Dict], int]]: + offset: int, + limit: int, + conversation_ids: Optional[list[UUID]] = None, + ) -> dict[str, Union[list[dict], int]]: """Get an overview of conversations with pagination.""" query = """ WITH conversation_overview AS ( @@ -258,7 +257,7 @@ async def add_message( conversation_id: str, content: Message, parent_id: Optional[str] = None, - metadata: Optional[Dict] = None, + metadata: Optional[dict] = None, ) -> str: """Add a message to a conversation.""" message_id = UUID(bytes=os.urandom(16)) @@ -337,7 +336,7 @@ async def transaction(): async def get_conversation( self, conversation_id: str, branch_id: Optional[str] = None - ) -> List[Tuple[str, Message]]: + ) -> list[Tuple[str, Message]]: """Retrieve all messages in a conversation branch.""" if not branch_id: # Get the most recent branch @@ -381,7 +380,7 @@ async def get_conversation( for row in rows ] - async def get_branches_overview(self, conversation_id: str) -> List[Dict]: + async def get_branches(self, conversation_id: str) -> list[dict]: """Get an overview of all branches in a conversation.""" query = """ SELECT b.id, b.branch_point_id, m.content, b.created_at diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index f5a524ef1..ad9424905 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -6,7 +6,6 @@ from core.base import ( DatabaseConfig, - DatabaseConnectionManager, DatabaseProvider, PostgresConfigurationSettings, VectorQuantizationType, @@ -16,12 +15,12 @@ from core.providers.database.collection import PostgresCollectionHandler from core.providers.database.document import PostgresDocumentHandler from core.providers.database.file import PostgresFileHandler -from core.providers.database.kg import PostgresKGHandler +from core.providers.database.graph import PostgresGraphHandler from core.providers.database.logging import PostgresLoggingHandler from core.providers.database.prompt import PostgresPromptHandler from core.providers.database.tokens import PostgresTokenHandler from core.providers.database.user import PostgresUserHandler -from core.providers.database.vector import PostgresVectorHandler +from core.providers.database.vector import PostgresChunkHandler from .base import SemaphoreConnectionPool @@ -59,11 +58,11 @@ class PostgresDBProvider(DatabaseProvider): connection_manager: PostgresConnectionManager document_handler: PostgresDocumentHandler - collection_handler: PostgresCollectionHandler + collections_handler: PostgresCollectionHandler token_handler: PostgresTokenHandler user_handler: PostgresUserHandler - vector_handler: PostgresVectorHandler - kg_handler: PostgresKGHandler + vector_handler: PostgresChunkHandler + graph_handler: PostgresGraphHandler prompt_handler: PostgresPromptHandler file_handler: PostgresFileHandler logging_handler: PostgresLoggingHandler @@ -142,26 +141,28 @@ def __init__( self.token_handler = PostgresTokenHandler( self.project_name, self.connection_manager ) - self.collection_handler = PostgresCollectionHandler( + self.collections_handler = PostgresCollectionHandler( self.project_name, self.connection_manager, self.config ) self.user_handler = PostgresUserHandler( self.project_name, self.connection_manager, self.crypto_provider ) - self.vector_handler = PostgresVectorHandler( + self.vector_handler = PostgresChunkHandler( self.project_name, self.connection_manager, self.dimension, self.quantization_type, self.enable_fts, ) - self.kg_handler = PostgresKGHandler( - self.project_name, - self.connection_manager, - self.collection_handler, - self.dimension, - self.quantization_type, + + self.graph_handler = PostgresGraphHandler( + project_name=self.project_name, + connection_manager=self.connection_manager, + collections_handler=self.collections_handler, + dimension=self.dimension, + quantization_type=self.quantization_type, ) + self.prompt_handler = PostgresPromptHandler( self.project_name, self.connection_manager ) @@ -192,13 +193,13 @@ async def initialize(self): ) await self.document_handler.create_tables() - await self.collection_handler.create_tables() + await self.collections_handler.create_tables() await self.token_handler.create_tables() await self.user_handler.create_tables() await self.vector_handler.create_tables() await self.prompt_handler.create_tables() await self.file_handler.create_tables() - await self.kg_handler.create_tables() + await self.graph_handler.create_tables() await self.logging_handler.create_tables() def _get_postgres_configuration_settings( diff --git a/py/core/providers/database/prompt.py b/py/core/providers/database/prompt.py index cbb8ff56b..1a537debd 100644 --- a/py/core/providers/database/prompt.py +++ b/py/core/providers/database/prompt.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from datetime import datetime, timedelta from pathlib import Path -from typing import Any, Generic, Optional, TypeVar, Union +from typing import Any, Generic, Optional, TypeVar import yaml @@ -133,7 +133,7 @@ def _cache_key( return f"{prompt_name}:{sorted_inputs}" return prompt_name - async def get_prompt( + async def get_cached_prompt( self, prompt_name: str, inputs: Optional[dict[str, Any]] = None, @@ -142,6 +142,11 @@ async def get_prompt( ) -> str: """Get a prompt with caching support""" if prompt_override: + if inputs: + try: + return prompt_override.format(**inputs) + except KeyError: + return prompt_override return prompt_override cache_key = self._cache_key(prompt_name, inputs) @@ -156,6 +161,35 @@ async def get_prompt( self._prompt_cache.set(cache_key, result) return result + async def get_prompt( # type: ignore + self, + name: str, + inputs: Optional[dict] = None, + prompt_override: Optional[str] = None, + ) -> dict: + query = f""" + SELECT id, name, template, input_types, created_at, updated_at + FROM {self._get_table_name("prompts")} + WHERE name = $1; + """ + result = await self.connection_manager.fetchrow_query(query, [name]) + + if not result: + raise ValueError(f"Prompt template '{name}' not found") + + input_types = result["input_types"] + if isinstance(input_types, str): + input_types = json.loads(input_types) + + return { + "id": result["id"], + "name": result["name"], + "template": result["template"], + "input_types": input_types, + "created_at": result["created_at"], + "updated_at": result["updated_at"], + } + @abstractmethod async def _get_prompt_impl( self, prompt_name: str, inputs: Optional[dict[str, Any]] = None @@ -220,7 +254,7 @@ def __init__( ) self.connection_manager = connection_manager self.project_name = project_name - self.prompts: dict[str, dict[str, Union[str, dict[str, str]]]] = {} + self.prompts: dict[str, dict[str, str | dict[str, str]]] = {} async def _load_prompts(self) -> None: """Load prompts from both database and YAML files.""" @@ -233,7 +267,7 @@ async def _load_prompts(self) -> None: async def _load_prompts_from_database(self) -> None: """Load prompts from the database.""" query = f""" - SELECT prompt_id, name, template, input_types, created_at, updated_at + SELECT id, name, template, input_types, created_at, updated_at FROM {self._get_table_name("prompts")}; """ try: @@ -247,7 +281,7 @@ async def _load_prompts_from_database(self) -> None: input_types = json.loads(input_types) self.prompts[row["name"]] = { - "prompt_id": row["prompt_id"], + "id": row["id"], "template": row["template"], "input_types": input_types, "created_at": row["created_at"], @@ -257,7 +291,7 @@ async def _load_prompts_from_database(self) -> None: self._template_cache.set( row["name"], { - "prompt_id": row["prompt_id"], + "id": row["id"], "template": row["template"], "input_types": input_types, }, @@ -405,7 +439,7 @@ async def _update_prompt_impl( UPDATE {self._get_table_name("prompts")} SET {', '.join(set_clauses)} WHERE name = $1 - RETURNING prompt_id, template, input_types; + RETURNING id, template, input_types; """ try: @@ -433,7 +467,7 @@ async def create_tables(self): """Create the necessary tables for storing prompts.""" query = f""" CREATE TABLE IF NOT EXISTS {self._get_table_name("prompts")} ( - prompt_id UUID PRIMARY KEY, + id UUID PRIMARY KEY, name VARCHAR(255) NOT NULL UNIQUE, template TEXT NOT NULL, input_types JSONB NOT NULL, @@ -471,7 +505,7 @@ async def add_prompt( if preserve_existing and name in self.prompts: return - prompt_id = generate_default_prompt_id(name) + id = generate_default_prompt_id(name) # Ensure input_types is properly serialized input_types_json = ( @@ -481,21 +515,21 @@ async def add_prompt( ) query = f""" - INSERT INTO {self._get_table_name("prompts")} (prompt_id, name, template, input_types) + INSERT INTO {self._get_table_name("prompts")} (id, name, template, input_types) VALUES ($1, $2, $3, $4) ON CONFLICT (name) DO UPDATE SET template = EXCLUDED.template, input_types = EXCLUDED.input_types, updated_at = CURRENT_TIMESTAMP - RETURNING prompt_id, created_at, updated_at; + RETURNING id, created_at, updated_at; """ result = await self.connection_manager.fetchrow_query( - query, [prompt_id, name, template, input_types_json] + query, [id, name, template, input_types_json] ) self.prompts[name] = { - "prompt_id": result["prompt_id"], + "id": result["id"], "template": template, "input_types": input_types, "created_at": result["created_at"], @@ -506,7 +540,7 @@ async def add_prompt( self._template_cache.set( name, { - "prompt_id": prompt_id, + "id": id, "template": template, "input_types": input_types, }, # Store as dict in cache @@ -520,15 +554,20 @@ async def add_prompt( async def get_all_prompts(self) -> dict[str, Any]: """Retrieve all stored prompts.""" query = f""" - SELECT prompt_id, name, template, input_types, created_at, updated_at + SELECT id, name, template, input_types, created_at, updated_at, COUNT(*) OVER() AS total_entries FROM {self._get_table_name("prompts")}; """ results = await self.connection_manager.fetch_query(query) - return { - row["name"]: { + if not results: + return {"results": [], "total_entries": 0} + + total_entries = results[0]["total_entries"] if results else 0 + + prompts = [ + { "name": row["name"], - "prompt_id": row["prompt_id"], + "id": row["id"], "template": row["template"], "input_types": ( json.loads(row["input_types"]) @@ -539,7 +578,9 @@ async def get_all_prompts(self) -> dict[str, Any]: "updated_at": row["updated_at"], } for row in results - } + ] + + return {"results": prompts, "total_entries": total_entries} async def delete_prompt(self, name: str) -> None: """Delete a prompt template.""" @@ -572,13 +613,13 @@ async def get_message_payload( if system_prompt_override: system_prompt = system_prompt_override else: - system_prompt = await self.get_prompt( + system_prompt = await self.get_cached_prompt( system_prompt_name or "default_system", system_inputs, prompt_override=system_prompt_override, ) - task_prompt = await self.get_prompt( + task_prompt = await self.get_cached_prompt( task_prompt_name or "default_rag", task_inputs, prompt_override=task_prompt_override, diff --git a/py/core/providers/database/prompts/graphrag_community_reports.yaml b/py/core/providers/database/prompts/graphrag_communities.yaml similarity index 98% rename from py/core/providers/database/prompts/graphrag_community_reports.yaml rename to py/core/providers/database/prompts/graphrag_communities.yaml index 7b7828f69..be68b3d6e 100644 --- a/py/core/providers/database/prompts/graphrag_community_reports.yaml +++ b/py/core/providers/database/prompts/graphrag_communities.yaml @@ -1,4 +1,4 @@ -graphrag_community_reports: +graphrag_communities: template: | You are an AI assistant that helps a human analyst to perform general information discovery. Information discovery is the process of identifying and assessing relevant information associated with certain entities (e.g., organizations and individuals) within a network. @@ -55,14 +55,14 @@ graphrag_community_reports: Entity: OpenAI descriptions: 101,OpenAI is an AI research and deployment company. - triples: + relationships: 201,OpenAI,Stripe,OpenAI partnered with Stripe to integrate payment solutions. 203,Airbnb,OpenAI,Airbnb utilizes OpenAI's AI tools for customer service. 204,Stripe,OpenAI,Stripe invested in OpenAI's latest funding round. Entity: Stripe descriptions: 102,Stripe is a technology company that builds economic infrastructure for the internet. - triples: + relationships: 201,OpenAI,Stripe,OpenAI partnered with Stripe to integrate payment solutions. 202,Stripe,Airbnb,Stripe provides payment processing services to Airbnb. 204,Stripe,OpenAI,Stripe invested in OpenAI's latest funding round. @@ -70,7 +70,7 @@ graphrag_community_reports: Entity: Airbnb descriptions: 103,Airbnb is an online marketplace for lodging and tourism experiences. - triples: + relationships: 203,Airbnb,OpenAI,Airbnb utilizes OpenAI's AI tools for customer service. 205,Airbnb,Stripe,Airbnb and Stripe collaborate on expanding global payment options. diff --git a/py/core/providers/database/prompts/graphrag_entity_description.yaml b/py/core/providers/database/prompts/graphrag_entity_description.yaml index ea0066a41..bfed919a0 100644 --- a/py/core/providers/database/prompts/graphrag_entity_description.yaml +++ b/py/core/providers/database/prompts/graphrag_entity_description.yaml @@ -1,15 +1,15 @@ graphrag_entity_description: template: | - Provide a comprehensive yet concise summary of the given entity, incorporating its description and associated triples: + Provide a comprehensive yet concise summary of the given entity, incorporating its description and associated relationships: Entity Info: {entity_info} - Triples: - {triples_txt} + Relationships: + {relationships_txt} Your summary should: 1. Clearly define the entity's core concept or purpose - 2. Highlight key relationships or attributes from the triples + 2. Highlight key relationships or attributes from the relationships 3. Integrate any relevant information from the existing description 4. Maintain a neutral, factual tone 5. Be approximately 2-3 sentences long @@ -17,4 +17,4 @@ graphrag_entity_description: Ensure the summary is coherent, informative, and captures the essence of the entity within the context of the provided information. input_types: entity_info: str - triples_txt: str + relationships_txt: str diff --git a/py/core/providers/database/prompts/graphrag_triples_extraction_few_shot.yaml b/py/core/providers/database/prompts/graphrag_relationships_extraction_few_shot.yaml similarity index 98% rename from py/core/providers/database/prompts/graphrag_triples_extraction_few_shot.yaml rename to py/core/providers/database/prompts/graphrag_relationships_extraction_few_shot.yaml index 6bfb1bb26..867c0ac70 100644 --- a/py/core/providers/database/prompts/graphrag_triples_extraction_few_shot.yaml +++ b/py/core/providers/database/prompts/graphrag_relationships_extraction_few_shot.yaml @@ -1,8 +1,8 @@ -graphrag_triples_extraction_few_shot: +graphrag_relationships_extraction_few_shot: template: > -Goal- Given a text document, identify all entities and their entity types from the text and all relationships among the identified entities. - Given the text, extract up to {max_knowledge_triples} entity-relation triplets. + Given the text, extract up to {max_knowledge_relationships} entity-relation relationshipts. -Steps- 1. Identify all entities. For each identified entity, extract the following information: - entity_name: Name of the entity, capitalized @@ -117,7 +117,7 @@ graphrag_triples_extraction_few_shot: Output: input_types: - max_knowledge_triples: int + max_knowledge_relationships: int input: str entity_types: list[str] relation_types: list[str] diff --git a/py/core/providers/database/prompts/prompt_tuning.yaml b/py/core/providers/database/prompts/prompt_tuning.yaml index 7f2f3d687..a3924c684 100644 --- a/py/core/providers/database/prompts/prompt_tuning.yaml +++ b/py/core/providers/database/prompts/prompt_tuning.yaml @@ -22,4 +22,5 @@ prompt_tuning_task: Return only the new prompt template, maintaining the exact format required for the input types. input_types: prompt_template: str + sample_data: str input_types: str diff --git a/py/core/providers/database/user.py b/py/core/providers/database/user.py index a6974a1d4..c84f986ed 100644 --- a/py/core/providers/database/user.py +++ b/py/core/providers/database/user.py @@ -1,13 +1,13 @@ from datetime import datetime -from typing import Optional, Union +from typing import Optional from uuid import UUID from fastapi import HTTPException from core.base import CryptoProvider, UserHandler -from core.base.abstractions import R2RException, UserStats -from core.base.api.models import UserResponse +from core.base.abstractions import R2RException from core.utils import generate_user_id +from shared.abstractions import User from .base import PostgresConnectionManager, QueryBuilder from .collection import PostgresCollectionHandler @@ -28,7 +28,7 @@ def __init__( async def create_tables(self): query = f""" CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresUserHandler.TABLE_NAME)} ( - user_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), email TEXT UNIQUE NOT NULL, hashed_password TEXT NOT NULL, is_superuser BOOLEAN DEFAULT FALSE, @@ -48,12 +48,12 @@ async def create_tables(self): """ await self.connection_manager.execute_query(query) - async def get_user_by_id(self, user_id: UUID) -> UserResponse: + async def get_user_by_id(self, id: UUID) -> User: query, _ = ( QueryBuilder(self._get_table_name("users")) .select( [ - "user_id", + "id", "email", "hashed_password", "is_superuser", @@ -67,16 +67,16 @@ async def get_user_by_id(self, user_id: UUID) -> UserResponse: "collection_ids", ] ) - .where("user_id = $1") + .where("id = $1") .build() ) - result = await self.connection_manager.fetchrow_query(query, [user_id]) + result = await self.connection_manager.fetchrow_query(query, [id]) if not result: raise R2RException(status_code=404, message="User not found") - return UserResponse( - id=result["user_id"], + return User( + id=result["id"], email=result["email"], hashed_password=result["hashed_password"], is_superuser=result["is_superuser"], @@ -90,12 +90,12 @@ async def get_user_by_id(self, user_id: UUID) -> UserResponse: collection_ids=result["collection_ids"], ) - async def get_user_by_email(self, email: str) -> UserResponse: + async def get_user_by_email(self, email: str) -> User: query, params = ( QueryBuilder(self._get_table_name("users")) .select( [ - "user_id", + "id", "email", "hashed_password", "is_superuser", @@ -116,8 +116,8 @@ async def get_user_by_email(self, email: str) -> UserResponse: if not result: raise R2RException(status_code=404, message="User not found") - return UserResponse( - id=result["user_id"], + return User( + id=result["id"], email=result["email"], hashed_password=result["hashed_password"], is_superuser=result["is_superuser"], @@ -131,7 +131,9 @@ async def get_user_by_email(self, email: str) -> UserResponse: collection_ids=result["collection_ids"], ) - async def create_user(self, email: str, password: str) -> UserResponse: + async def create_user( + self, email: str, password: str, is_superuser: bool = False + ) -> User: try: if await self.get_user_by_email(email): raise R2RException( @@ -145,12 +147,19 @@ async def create_user(self, email: str, password: str) -> UserResponse: hashed_password = self.crypto_provider.get_password_hash(password) # type: ignore query = f""" INSERT INTO {self._get_table_name(PostgresUserHandler.TABLE_NAME)} - (email, user_id, hashed_password, collection_ids) - VALUES ($1, $2, $3, $4) - RETURNING user_id, email, is_superuser, is_active, is_verified, created_at, updated_at, collection_ids + (email, id, is_superuser, hashed_password, collection_ids) + VALUES ($1, $2, $3, $4, $5) + RETURNING id, email, is_superuser, is_active, is_verified, created_at, updated_at, collection_ids """ result = await self.connection_manager.fetchrow_query( - query, [email, generate_user_id(email), hashed_password, []] + query, + [ + email, + generate_user_id(email), + is_superuser, + hashed_password, + [], + ], ) if not result: @@ -159,8 +168,8 @@ async def create_user(self, email: str, password: str) -> UserResponse: detail="Failed to create user", ) - return UserResponse( - id=result["user_id"], + return User( + id=result["id"], email=result["email"], is_superuser=result["is_superuser"], is_active=result["is_active"], @@ -171,13 +180,13 @@ async def create_user(self, email: str, password: str) -> UserResponse: hashed_password=hashed_password, ) - async def update_user(self, user: UserResponse) -> UserResponse: + async def update_user(self, user: User) -> User: query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET email = $1, is_superuser = $2, is_active = $3, is_verified = $4, updated_at = NOW(), name = $5, profile_picture = $6, bio = $7, collection_ids = $8 - WHERE user_id = $9 - RETURNING user_id, email, is_superuser, is_active, is_verified, created_at, updated_at, name, profile_picture, bio, collection_ids + WHERE id = $9 + RETURNING id, email, is_superuser, is_active, is_verified, created_at, updated_at, name, profile_picture, bio, collection_ids """ result = await self.connection_manager.fetchrow_query( query, @@ -200,8 +209,8 @@ async def update_user(self, user: UserResponse) -> UserResponse: detail="Failed to update user", ) - return UserResponse( - id=result["user_id"], + return User( + id=result["id"], email=result["email"], is_superuser=result["is_superuser"], is_active=result["is_active"], @@ -214,14 +223,14 @@ async def update_user(self, user: UserResponse) -> UserResponse: collection_ids=result["collection_ids"], ) - async def delete_user_relational(self, user_id: UUID) -> None: + async def delete_user_relational(self, id: UUID) -> None: # Get the collections the user belongs to collection_query = f""" SELECT collection_ids FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} - WHERE user_id = $1 + WHERE id = $1 """ collection_result = await self.connection_manager.fetchrow_query( - collection_query, [user_id] + collection_query, [id] ) if not collection_result: @@ -229,49 +238,45 @@ async def delete_user_relational(self, user_id: UUID) -> None: # Remove user from documents doc_update_query = f""" - UPDATE {self._get_table_name('document_info')} - SET user_id = NULL - WHERE user_id = $1 + UPDATE {self._get_table_name('documents')} + SET id = NULL + WHERE id = $1 """ - await self.connection_manager.execute_query( - doc_update_query, [user_id] - ) + await self.connection_manager.execute_query(doc_update_query, [id]) # Delete the user delete_query = f""" DELETE FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} - WHERE user_id = $1 - RETURNING user_id + WHERE id = $1 + RETURNING id """ result = await self.connection_manager.fetchrow_query( - delete_query, [user_id] + delete_query, [id] ) if not result: raise R2RException(status_code=404, message="User not found") - async def update_user_password( - self, user_id: UUID, new_hashed_password: str - ): + async def update_user_password(self, id: UUID, new_hashed_password: str): query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET hashed_password = $1, updated_at = NOW() - WHERE user_id = $2 + WHERE id = $2 """ await self.connection_manager.execute_query( - query, [new_hashed_password, user_id] + query, [new_hashed_password, id] ) - async def get_all_users(self) -> list[UserResponse]: + async def get_all_users(self) -> list[User]: query = f""" - SELECT user_id, email, is_superuser, is_active, is_verified, created_at, updated_at, collection_ids + SELECT id, email, is_superuser, is_active, is_verified, created_at, updated_at, collection_ids FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} """ results = await self.connection_manager.fetch_query(query) return [ - UserResponse( - id=result["user_id"], + User( + id=result["id"], email=result["email"], hashed_password="null", is_superuser=result["is_superuser"], @@ -285,15 +290,15 @@ async def get_all_users(self) -> list[UserResponse]: ] async def store_verification_code( - self, user_id: UUID, verification_code: str, expiry: datetime + self, id: UUID, verification_code: str, expiry: datetime ): query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET verification_code = $1, verification_code_expiry = $2 - WHERE user_id = $3 + WHERE id = $3 """ await self.connection_manager.execute_query( - query, [verification_code, expiry, user_id] + query, [verification_code, expiry, id] ) async def verify_user(self, verification_code: str) -> None: @@ -301,7 +306,7 @@ async def verify_user(self, verification_code: str) -> None: UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL WHERE verification_code = $1 AND verification_code_expiry > NOW() - RETURNING user_id + RETURNING id """ result = await self.connection_manager.fetchrow_query( query, [verification_code] @@ -320,100 +325,100 @@ async def remove_verification_code(self, verification_code: str): """ await self.connection_manager.execute_query(query, [verification_code]) - async def expire_verification_code(self, user_id: UUID): + async def expire_verification_code(self, id: UUID): query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET verification_code_expiry = NOW() - INTERVAL '1 day' - WHERE user_id = $1 + WHERE id = $1 """ - await self.connection_manager.execute_query(query, [user_id]) + await self.connection_manager.execute_query(query, [id]) async def store_reset_token( - self, user_id: UUID, reset_token: str, expiry: datetime + self, id: UUID, reset_token: str, expiry: datetime ): query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET reset_token = $1, reset_token_expiry = $2 - WHERE user_id = $3 + WHERE id = $3 """ await self.connection_manager.execute_query( - query, [reset_token, expiry, user_id] + query, [reset_token, expiry, id] ) async def get_user_id_by_reset_token( self, reset_token: str ) -> Optional[UUID]: query = f""" - SELECT user_id FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} + SELECT id FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} WHERE reset_token = $1 AND reset_token_expiry > NOW() """ result = await self.connection_manager.fetchrow_query( query, [reset_token] ) - return result["user_id"] if result else None + return result["id"] if result else None - async def remove_reset_token(self, user_id: UUID): + async def remove_reset_token(self, id: UUID): query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET reset_token = NULL, reset_token_expiry = NULL - WHERE user_id = $1 + WHERE id = $1 """ - await self.connection_manager.execute_query(query, [user_id]) + await self.connection_manager.execute_query(query, [id]) - async def remove_user_from_all_collections(self, user_id: UUID): + async def remove_user_from_all_collections(self, id: UUID): query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET collection_ids = ARRAY[]::UUID[] - WHERE user_id = $1 + WHERE id = $1 """ - await self.connection_manager.execute_query(query, [user_id]) + await self.connection_manager.execute_query(query, [id]) async def add_user_to_collection( - self, user_id: UUID, collection_id: UUID - ) -> None: - if not await self.get_user_by_id(user_id): + self, id: UUID, collection_id: UUID + ) -> bool: + if not await self.get_user_by_id(id): raise R2RException(status_code=404, message="User not found") query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET collection_ids = array_append(collection_ids, $1) - WHERE user_id = $2 AND NOT ($1 = ANY(collection_ids)) - RETURNING user_id + WHERE id = $2 AND NOT ($1 = ANY(collection_ids)) + RETURNING id """ result = await self.connection_manager.fetchrow_query( - query, [collection_id, user_id] + query, [collection_id, id] ) # fetchrow instead of execute_query if not result: raise R2RException( status_code=400, message="User already in collection" ) - return None + return True async def remove_user_from_collection( - self, user_id: UUID, collection_id: UUID - ) -> None: - if not await self.get_user_by_id(user_id): + self, id: UUID, collection_id: UUID + ) -> bool: + if not await self.get_user_by_id(id): raise R2RException(status_code=404, message="User not found") query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET collection_ids = array_remove(collection_ids, $1) - WHERE user_id = $2 AND $1 = ANY(collection_ids) - RETURNING user_id + WHERE id = $2 AND $1 = ANY(collection_ids) + RETURNING id """ result = await self.connection_manager.fetchrow_query( - query, [collection_id, user_id] + query, [collection_id, id] ) if not result: raise R2RException( status_code=400, message="User is not a member of the specified collection", ) - return None + return True async def get_users_in_collection( - self, collection_id: UUID, offset: int = 0, limit: int = -1 - ) -> dict[str, Union[list[UserResponse], int]]: + self, collection_id: UUID, offset: int, limit: int + ) -> dict[str, list[User] | int]: """ Get all users in a specific collection with pagination. @@ -423,7 +428,7 @@ async def get_users_in_collection( limit (int): The maximum number of users to return. Returns: - List[UserResponse]: A list of UserResponse objects representing the users in the collection. + List[User]: A list of User objects representing the users in the collection. Raises: R2RException: If the collection doesn't exist. @@ -432,7 +437,7 @@ async def get_users_in_collection( raise R2RException(status_code=404, message="Collection not found") query = f""" - SELECT u.user_id, u.email, u.is_active, u.is_superuser, u.created_at, u.updated_at, + SELECT u.id, u.email, u.is_active, u.is_superuser, u.created_at, u.updated_at, u.is_verified, u.collection_ids, u.name, u.bio, u.profile_picture, COUNT(*) OVER() AS total_entries FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} u @@ -449,8 +454,8 @@ async def get_users_in_collection( results = await self.connection_manager.fetch_query(query, conditions) users = [ - UserResponse( - id=row["user_id"], + User( + id=row["id"], email=row["email"], is_active=row["is_active"], is_superuser=row["is_superuser"], @@ -471,19 +476,19 @@ async def get_users_in_collection( return {"results": users, "total_entries": total_entries} - async def mark_user_as_superuser(self, user_id: UUID): + async def mark_user_as_superuser(self, id: UUID): query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET is_superuser = TRUE, is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL - WHERE user_id = $1 + WHERE id = $1 """ - await self.connection_manager.execute_query(query, [user_id]) + await self.connection_manager.execute_query(query, [id]) async def get_user_id_by_verification_code( self, verification_code: str ) -> Optional[UUID]: query = f""" - SELECT user_id FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} + SELECT id FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} WHERE verification_code = $1 AND verification_code_expiry > NOW() """ result = await self.connection_manager.fetchrow_query( @@ -495,26 +500,35 @@ async def get_user_id_by_verification_code( status_code=400, message="Invalid or expired verification code" ) - return result["user_id"] + return result["id"] - async def mark_user_as_verified(self, user_id: UUID): + async def mark_user_as_verified(self, id: UUID): query = f""" UPDATE {self._get_table_name(PostgresUserHandler.TABLE_NAME)} SET is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL - WHERE user_id = $1 + WHERE id = $1 """ - await self.connection_manager.execute_query(query, [user_id]) + await self.connection_manager.execute_query(query, [id]) async def get_users_overview( self, + offset: int, + limit: int, user_ids: Optional[list[UUID]] = None, - offset: int = 0, - limit: int = -1, - ) -> dict[str, Union[list[UserStats], int]]: + ) -> dict[str, list[User] | int]: + query = f""" - WITH user_docs AS ( + WITH user_document_ids AS ( SELECT - u.user_id, + u.id as user_id, + ARRAY_AGG(d.id) FILTER (WHERE d.id IS NOT NULL) AS doc_ids + FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} u + LEFT JOIN {self._get_table_name('documents')} d ON u.id = d.owner_id + GROUP BY u.id + ), + user_docs AS ( + SELECT + u.id, u.email, u.is_superuser, u.is_active, @@ -522,16 +536,19 @@ async def get_users_overview( u.created_at, u.updated_at, u.collection_ids, - COUNT(d.document_id) AS num_files, + COUNT(d.id) AS num_files, COALESCE(SUM(d.size_in_bytes), 0) AS total_size_in_bytes, - ARRAY_AGG(d.document_id) FILTER (WHERE d.document_id IS NOT NULL) AS document_ids, - COUNT(*) OVER() AS total_entries + ud.doc_ids as document_ids FROM {self._get_table_name(PostgresUserHandler.TABLE_NAME)} u - LEFT JOIN {self._get_table_name('document_info')} d ON u.user_id = d.user_id - {' WHERE u.user_id = ANY($3::uuid[])' if user_ids else ''} - GROUP BY u.user_id, u.email, u.is_superuser, u.is_active, u.is_verified, u.created_at, u.updated_at, u.collection_ids + LEFT JOIN {self._get_table_name('documents')} d ON u.id = d.owner_id + LEFT JOIN user_document_ids ud ON u.id = ud.user_id + {' WHERE u.id = ANY($3::uuid[])' if user_ids else ''} + GROUP BY u.id, u.email, u.is_superuser, u.is_active, u.is_verified, + u.created_at, u.updated_at, u.collection_ids, ud.doc_ids ) - SELECT * + SELECT + user_docs.*, + COUNT(*) OVER() AS total_entries FROM user_docs ORDER BY email OFFSET $1 @@ -549,22 +566,29 @@ async def get_users_overview( results = await self.connection_manager.fetch_query(query, params) users = [ - UserStats( - user_id=row[0], - email=row[1], - is_superuser=row[2], - is_active=row[3], - is_verified=row[4], - created_at=row[5], - updated_at=row[6], - collection_ids=row[7] or [], - num_files=row[8], - total_size_in_bytes=row[9], - document_ids=row[10] or [], + User( + id=row["id"], + email=row["email"], + is_superuser=row["is_superuser"], + is_active=row["is_active"], + is_verified=row["is_verified"], + created_at=row["created_at"], + updated_at=row["updated_at"], + collection_ids=row["collection_ids"] or [], + num_files=row["num_files"], + total_size_in_bytes=row["total_size_in_bytes"], + document_ids=( + [] + if row["document_ids"] is None + else [doc_id for doc_id in row["document_ids"]] + ), ) for row in results ] + if not users: + raise R2RException(status_code=404, message="No users found") + total_entries = results[0]["total_entries"] return {"results": users, "total_entries": total_entries} @@ -573,7 +597,7 @@ async def _collection_exists(self, collection_id: UUID) -> bool: """Check if a collection exists.""" query = f""" SELECT 1 FROM {self._get_table_name(PostgresCollectionHandler.TABLE_NAME)} - WHERE collection_id = $1 + WHERE id = $1 """ result = await self.connection_manager.fetchrow_query( query, [collection_id] @@ -581,7 +605,8 @@ async def _collection_exists(self, collection_id: UUID) -> bool: return result is not None async def get_user_validation_data( - self, user_id: UUID, *args, **kwargs + self, + user_id: UUID, ) -> dict: """ Get verification data for a specific user. @@ -594,7 +619,7 @@ async def get_user_validation_data( reset_token, reset_token_expiry FROM {self._get_table_name("users")} - WHERE user_id = $1 + WHERE id = $1 """ result = await self.connection_manager.fetchrow_query(query, [user_id]) diff --git a/py/core/providers/database/vecs/adapter/base.py b/py/core/providers/database/vecs/adapter/base.py index 47870d10c..bda82f736 100644 --- a/py/core/providers/database/vecs/adapter/base.py +++ b/py/core/providers/database/vecs/adapter/base.py @@ -8,13 +8,13 @@ from abc import ABC, abstractmethod from enum import Enum -from typing import Any, Dict, Generator, Iterable, Optional, Tuple, Union +from typing import Any, Generator, Iterable, Optional, Tuple, Union from uuid import UUID from vecs.exc import ArgError MetadataValues = Union[str, int, float, bool, list[str]] -Metadata = Dict[str, MetadataValues] +Metadata = dict[str, MetadataValues] Numeric = Union[int, float, complex] Record = Tuple[ @@ -65,9 +65,9 @@ def exported_dimension(self) -> Optional[int]: @abstractmethod def __call__( self, - records: Iterable[Tuple[str, Any, Optional[Dict]]], + records: Iterable[Tuple[str, Any, Optional[dict]]], adapter_context: AdapterContext, - ) -> Generator[Tuple[str, Any, Dict], None, None]: + ) -> Generator[Tuple[str, Any, dict], None, None]: """ Abstract method that should be overridden by subclasses to handle each record. """ @@ -106,9 +106,9 @@ def exported_dimension(self) -> Optional[int]: def __call__( self, - records: Iterable[Tuple[str, Any, Optional[Dict]]], + records: Iterable[Tuple[str, Any, Optional[dict]]], adapter_context: AdapterContext, - ) -> Generator[Tuple[str, Any, Dict], None, None]: + ) -> Generator[Tuple[str, Any, dict], None, None]: """ Invokes the adapter pipeline on an iterable of records. diff --git a/py/core/providers/database/vecs/adapter/markdown.py b/py/core/providers/database/vecs/adapter/markdown.py index 9a70e39bc..e239bbf75 100644 --- a/py/core/providers/database/vecs/adapter/markdown.py +++ b/py/core/providers/database/vecs/adapter/markdown.py @@ -1,5 +1,5 @@ import re -from typing import Any, Dict, Generator, Iterable, Optional, Tuple +from typing import Any, Generator, Iterable, Optional, Tuple from flupy import flu @@ -54,22 +54,22 @@ def split_by_heading( def __call__( self, - records: Iterable[Tuple[str, Any, Optional[Dict]]], + records: Iterable[Tuple[str, Any, Optional[dict]]], adapter_context: AdapterContext, max_tokens: int = 99999999, - ) -> Generator[Tuple[str, Any, Dict], None, None]: + ) -> Generator[Tuple[str, Any, dict], None, None]: """ Splits each markdown string in the records into chunks where each heading starts a new chunk, and yields each chunk as a separate record. If the `skip_during_query` attribute is set to True, this step is skipped during querying. Args: - records (Iterable[Tuple[str, Any, Optional[Dict]]]): Iterable of tuples each containing an id, a markdown string and an optional dict. + records (Iterable[Tuple[str, Any, Optional[dict]]]): Iterable of tuples each containing an id, a markdown string and an optional dict. adapter_context (AdapterContext): Context of the adapter. max_tokens (int): The maximum number of tokens per chunk Yields: - Tuple[str, Any, Dict]: The id appended with chunk index, the chunk, and the metadata. + Tuple[str, Any, dict]: The id appended with chunk index, the chunk, and the metadata. """ if max_tokens and max_tokens < 1: raise ValueError("max_tokens must be a nonzero positive integer") diff --git a/py/core/providers/database/vecs/adapter/text.py b/py/core/providers/database/vecs/adapter/text.py index 78ae7732b..1fec8e208 100644 --- a/py/core/providers/database/vecs/adapter/text.py +++ b/py/core/providers/database/vecs/adapter/text.py @@ -5,7 +5,7 @@ All public classes, enums, and functions are re-exported by `vecs.adapters` module. """ -from typing import Any, Dict, Generator, Iterable, Literal, Optional, Tuple +from typing import Any, Generator, Iterable, Literal, Optional, Tuple from flupy import flu from vecs.exc import MissingDependency @@ -78,9 +78,9 @@ def exported_dimension(self) -> Optional[int]: def __call__( self, - records: Iterable[Tuple[str, Any, Optional[Dict]]], + records: Iterable[Tuple[str, Any, Optional[dict]]], adapter_context: AdapterContext, # pyright: ignore - ) -> Generator[Tuple[str, Any, Dict], None, None]: + ) -> Generator[Tuple[str, Any, dict], None, None]: """ Converts each media in the records to an embedding and yields the result. @@ -89,7 +89,7 @@ def __call__( adapter_context: Context of the adapter. Yields: - Tuple[str, Any, Dict]: The id, the embedding, and the metadata. + Tuple[str, Any, dict]: The id, the embedding, and the metadata. """ for batch in flu(records).chunk(self.batch_size): batch_records = [x for x in batch] @@ -118,20 +118,20 @@ def __init__(self, *, skip_during_query: bool): def __call__( self, - records: Iterable[Tuple[str, Any, Optional[Dict]]], + records: Iterable[Tuple[str, Any, Optional[dict]]], adapter_context: AdapterContext, - ) -> Generator[Tuple[str, Any, Dict], None, None]: + ) -> Generator[Tuple[str, Any, dict], None, None]: """ Splits each media in the records into paragraphs and yields each paragraph as a separate record. If the `skip_during_query` attribute is set to True, this step is skipped during querying. Args: - records (Iterable[Tuple[str, Any, Optional[Dict]]]): Iterable of tuples each containing an id, a media and an optional dict. + records (Iterable[Tuple[str, Any, Optional[dict]]]): Iterable of tuples each containing an id, a media and an optional dict. adapter_context (AdapterContext): Context of the adapter. Yields: - Tuple[str, Any, Dict]: The id appended with paragraph index, the paragraph, and the metadata. + Tuple[str, Any, dict]: The id appended with paragraph index, the paragraph, and the metadata. """ if ( adapter_context == AdapterContext("query") diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 4ee807922..ae4412a90 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -3,21 +3,22 @@ import logging import time import uuid -from typing import Any, Optional, Tuple, TypedDict, Union +from typing import Any, Optional, TypedDict from uuid import UUID import numpy as np from core.base import ( + ChunkHandler, + ChunkSearchResult, IndexArgsHNSW, IndexArgsIVFFlat, IndexMeasure, IndexMethod, + R2RException, SearchSettings, VectorEntry, - VectorHandler, VectorQuantizationType, - VectorSearchResult, VectorTableName, ) @@ -45,14 +46,15 @@ def index_measure_to_ops( def quantize_vector_to_binary( - vector: Union[list[float], np.ndarray], threshold: float = 0.0 + vector: list[float] | np.ndarray, + threshold: float = 0.0, ) -> bytes: """ Quantizes a float vector to a binary vector string for PostgreSQL bit type. Used when quantization_type is INT1. Args: - vector (Union[List[float], np.ndarray]): Input vector of floats + vector (List[float] | np.ndarray): Input vector of floats threshold (float, optional): Threshold for binarization. Defaults to 0.0. Returns: @@ -74,17 +76,17 @@ def quantize_vector_to_binary( class HybridSearchIntermediateResult(TypedDict): semantic_rank: int full_text_rank: int - data: VectorSearchResult + data: ChunkSearchResult rrf_score: float -class PostgresVectorHandler(VectorHandler): - TABLE_NAME = VectorTableName.VECTORS +class PostgresChunkHandler(ChunkHandler): + TABLE_NAME = VectorTableName.CHUNKS COLUMN_VARS = [ - "extraction_id", + "id", "document_id", - "user_id", + "owner_id", "collection_ids", ] @@ -103,7 +105,7 @@ def __init__( async def create_tables(self): # Check for old table name first - check_query = f""" + check_query = """ SELECT EXISTS ( SELECT FROM pg_tables WHERE schemaname = $1 @@ -129,10 +131,10 @@ async def create_tables(self): ) query = f""" - CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} ( - extraction_id UUID PRIMARY KEY, + CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} ( + id UUID PRIMARY KEY, document_id UUID, - user_id UUID, + owner_id UUID, collection_ids UUID[], vec vector({self.dimension}), {binary_col} @@ -140,13 +142,13 @@ async def create_tables(self): metadata JSONB {",fts tsvector GENERATED ALWAYS AS (to_tsvector('english', text)) STORED" if self.enable_fts else ""} ); - CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} (document_id); - CREATE INDEX IF NOT EXISTS idx_vectors_user_id ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} (user_id); - CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} USING GIN (collection_ids); + CREATE INDEX IF NOT EXISTS idx_vectors_document_id ON {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} (document_id); + CREATE INDEX IF NOT EXISTS idx_vectors_owner_id ON {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} (owner_id); + CREATE INDEX IF NOT EXISTS idx_vectors_collection_ids ON {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} USING GIN (collection_ids); """ if self.enable_fts: query += f""" - CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} USING GIN (to_tsvector('english', text)); + CREATE INDEX IF NOT EXISTS idx_vectors_text ON {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} USING GIN (to_tsvector('english', text)); """ await self.connection_manager.execute_query(query) @@ -160,12 +162,12 @@ async def upsert(self, entry: VectorEntry) -> None: if self.quantization_type == VectorQuantizationType.INT1: # For quantized vectors, use vec_binary column query = f""" - INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} - (extraction_id, document_id, user_id, collection_ids, vec, vec_binary, text, metadata) + INSERT INTO {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} + (id, document_id, owner_id, collection_ids, vec, vec_binary, text, metadata) VALUES ($1, $2, $3, $4, $5, $6::bit({self.dimension}), $7, $8) - ON CONFLICT (extraction_id) DO UPDATE SET + ON CONFLICT (id) DO UPDATE SET document_id = EXCLUDED.document_id, - user_id = EXCLUDED.user_id, + owner_id = EXCLUDED.owner_id, collection_ids = EXCLUDED.collection_ids, vec = EXCLUDED.vec, vec_binary = EXCLUDED.vec_binary, @@ -175,9 +177,9 @@ async def upsert(self, entry: VectorEntry) -> None: await self.connection_manager.execute_query( query, ( - entry.extraction_id, + entry.id, entry.document_id, - entry.user_id, + entry.owner_id, entry.collection_ids, str(entry.vector.data), quantize_vector_to_binary( @@ -190,12 +192,12 @@ async def upsert(self, entry: VectorEntry) -> None: else: # For regular vectors, use vec column only query = f""" - INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} - (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) + INSERT INTO {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} + (id, document_id, owner_id, collection_ids, vec, text, metadata) VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (extraction_id) DO UPDATE SET + ON CONFLICT (id) DO UPDATE SET document_id = EXCLUDED.document_id, - user_id = EXCLUDED.user_id, + owner_id = EXCLUDED.owner_id, collection_ids = EXCLUDED.collection_ids, vec = EXCLUDED.vec, text = EXCLUDED.text, @@ -205,9 +207,9 @@ async def upsert(self, entry: VectorEntry) -> None: await self.connection_manager.execute_query( query, ( - entry.extraction_id, + entry.id, entry.document_id, - entry.user_id, + entry.owner_id, entry.collection_ids, str(entry.vector.data), entry.text, @@ -223,12 +225,12 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: if self.quantization_type == VectorQuantizationType.INT1: # For quantized vectors, use vec_binary column query = f""" - INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} - (extraction_id, document_id, user_id, collection_ids, vec, vec_binary, text, metadata) + INSERT INTO {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} + (id, document_id, owner_id, collection_ids, vec, vec_binary, text, metadata) VALUES ($1, $2, $3, $4, $5, $6::bit({self.dimension}), $7, $8) - ON CONFLICT (extraction_id) DO UPDATE SET + ON CONFLICT (id) DO UPDATE SET document_id = EXCLUDED.document_id, - user_id = EXCLUDED.user_id, + owner_id = EXCLUDED.owner_id, collection_ids = EXCLUDED.collection_ids, vec = EXCLUDED.vec, vec_binary = EXCLUDED.vec_binary, @@ -237,9 +239,9 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: """ bin_params = [ ( - entry.extraction_id, + entry.id, entry.document_id, - entry.user_id, + entry.owner_id, entry.collection_ids, str(entry.vector.data), quantize_vector_to_binary( @@ -255,12 +257,12 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: else: # For regular vectors, use vec column only query = f""" - INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} - (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) + INSERT INTO {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} + (id, document_id, owner_id, collection_ids, vec, text, metadata) VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (extraction_id) DO UPDATE SET + ON CONFLICT (id) DO UPDATE SET document_id = EXCLUDED.document_id, - user_id = EXCLUDED.user_id, + owner_id = EXCLUDED.owner_id, collection_ids = EXCLUDED.collection_ids, vec = EXCLUDED.vec, text = EXCLUDED.text, @@ -268,9 +270,9 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: """ params = [ ( - entry.extraction_id, + entry.id, entry.document_id, - entry.user_id, + entry.owner_id, entry.collection_ids, str(entry.vector.data), entry.text, @@ -283,29 +285,31 @@ async def upsert_entries(self, entries: list[VectorEntry]) -> None: async def semantic_search( self, query_vector: list[float], search_settings: SearchSettings - ) -> list[VectorSearchResult]: + ) -> list[ChunkSearchResult]: try: - imeasure_obj = IndexMeasure(search_settings.index_measure) + imeasure_obj = IndexMeasure( + search_settings.chunk_settings.index_measure + ) except ValueError: raise ValueError("Invalid index measure") - table_name = self._get_table_name(PostgresVectorHandler.TABLE_NAME) + table_name = self._get_table_name(PostgresChunkHandler.TABLE_NAME) cols = [ - f"{table_name}.extraction_id", + f"{table_name}.id", f"{table_name}.document_id", - f"{table_name}.user_id", + f"{table_name}.owner_id", f"{table_name}.collection_ids", f"{table_name}.text", ] - params: list[Union[str, int, bytes]] = [] + params: list[str | int | bytes] = [] # For binary vectors (INT1), implement two-stage search if self.quantization_type == VectorQuantizationType.INT1: # Convert query vector to binary format binary_query = quantize_vector_to_binary(query_vector) # TODO - Put depth multiplier in config / settings extended_limit = ( - search_settings.search_limit * 20 + search_settings.limit * 20 ) # Get 20x candidates for re-ranking if ( imeasure_obj == IndexMeasure.hamming_distance @@ -350,9 +354,9 @@ async def semantic_search( ) -- Second stage: Re-rank using original vectors SELECT - extraction_id, + id, document_id, - user_id, + owner_id, collection_ids, text, {"metadata," if search_settings.include_metadatas else ""} @@ -366,17 +370,17 @@ async def semantic_search( [ extended_limit, # First stage limit search_settings.offset, - search_settings.search_limit, # Final limit + search_settings.limit, # Final limit str(query_vector), # For re-ranking ] ) else: # Standard float vector handling - unchanged from original - distance_calc = f"{table_name}.vec {search_settings.index_measure.pgvector_repr} $1::vector({self.dimension})" + distance_calc = f"{table_name}.vec {search_settings.chunk_settings.index_measure.pgvector_repr} $1::vector({self.dimension})" query_param = str(query_vector) - if search_settings.include_values: + if search_settings.include_scores: cols.append(f"({distance_calc}) AS distance") if search_settings.include_metadatas: cols.append(f"{table_name}.metadata") @@ -399,17 +403,15 @@ async def semantic_search( LIMIT ${len(params) + 1} OFFSET ${len(params) + 2} """ - params.extend( - [search_settings.search_limit, search_settings.offset] - ) + params.extend([search_settings.limit, search_settings.offset]) results = await self.connection_manager.fetch_query(query, params) return [ - VectorSearchResult( - extraction_id=UUID(str(result["extraction_id"])), + ChunkSearchResult( + id=UUID(str(result["id"])), document_id=UUID(str(result["document_id"])), - user_id=UUID(str(result["user_id"])), + owner_id=UUID(str(result["owner_id"])), collection_ids=result["collection_ids"], text=result["text"], score=( @@ -428,14 +430,14 @@ async def semantic_search( async def full_text_search( self, query_text: str, search_settings: SearchSettings - ) -> list[VectorSearchResult]: + ) -> list[ChunkSearchResult]: if not self.enable_fts: raise ValueError( "Full-text search is not enabled for this collection." ) where_clauses = [] - params: list[Union[str, int, bytes]] = [query_text] + params: list[str | int | bytes] = [query_text] if search_settings.filters: filters_clause = self._build_filters( @@ -454,9 +456,9 @@ async def full_text_search( query = f""" SELECT - extraction_id, document_id, user_id, collection_ids, text, metadata, + id, document_id, owner_id, collection_ids, text, metadata, ts_rank(fts, websearch_to_tsquery('english', $1), 32) as rank - FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + FROM {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} {where_clause} """ @@ -467,16 +469,16 @@ async def full_text_search( params.extend( [ search_settings.offset, - search_settings.hybrid_search_settings.full_text_limit, + search_settings.hybrid_settings.full_text_limit, ] ) results = await self.connection_manager.fetch_query(query, params) return [ - VectorSearchResult( - extraction_id=UUID(str(r["extraction_id"])), + ChunkSearchResult( + id=UUID(str(r["id"])), document_id=UUID(str(r["document_id"])), - user_id=UUID(str(r["user_id"])), + owner_id=UUID(str(r["owner_id"])), collection_ids=r["collection_ids"], text=r["text"], score=float(r["rank"]), @@ -492,50 +494,44 @@ async def hybrid_search( search_settings: SearchSettings, *args, **kwargs, - ) -> list[VectorSearchResult]: - if search_settings.hybrid_search_settings is None: + ) -> list[ChunkSearchResult]: + if search_settings.hybrid_settings is None: raise ValueError( - "Please provide a valid `hybrid_search_settings` in the `search_settings`." + "Please provide a valid `hybrid_settings` in the `search_settings`." ) if ( - search_settings.hybrid_search_settings.full_text_limit - < search_settings.search_limit + search_settings.hybrid_settings.full_text_limit + < search_settings.limit ): raise ValueError( - "The `full_text_limit` must be greater than or equal to the `search_limit`." + "The `full_text_limit` must be greater than or equal to the `limit`." ) semantic_settings = copy.deepcopy(search_settings) - semantic_settings.search_limit += search_settings.offset + semantic_settings.limit += search_settings.offset full_text_settings = copy.deepcopy(search_settings) - full_text_settings.hybrid_search_settings.full_text_limit += ( + full_text_settings.hybrid_settings.full_text_limit += ( search_settings.offset ) - semantic_results: list[VectorSearchResult] = ( - await self.semantic_search(query_vector, semantic_settings) + semantic_results: list[ChunkSearchResult] = await self.semantic_search( + query_vector, semantic_settings ) - full_text_results: list[VectorSearchResult] = ( + full_text_results: list[ChunkSearchResult] = ( await self.full_text_search(query_text, full_text_settings) ) - semantic_limit = search_settings.search_limit - full_text_limit = ( - search_settings.hybrid_search_settings.full_text_limit - ) - semantic_weight = ( - search_settings.hybrid_search_settings.semantic_weight - ) - full_text_weight = ( - search_settings.hybrid_search_settings.full_text_weight - ) - rrf_k = search_settings.hybrid_search_settings.rrf_k + semantic_limit = search_settings.limit + full_text_limit = search_settings.hybrid_settings.full_text_limit + semantic_weight = search_settings.hybrid_settings.semantic_weight + full_text_weight = search_settings.hybrid_settings.full_text_weight + rrf_k = search_settings.hybrid_settings.rrf_k combined_results: dict[uuid.UUID, HybridSearchIntermediateResult] = {} for rank, result in enumerate(semantic_results, 1): - combined_results[result.extraction_id] = { + combined_results[result.id] = { "semantic_rank": rank, "full_text_rank": full_text_limit, "data": result, @@ -543,10 +539,10 @@ async def hybrid_search( } for rank, result in enumerate(full_text_results, 1): - if result.extraction_id in combined_results: - combined_results[result.extraction_id]["full_text_rank"] = rank + if result.id in combined_results: + combined_results[result.id]["full_text_rank"] = rank else: - combined_results[result.extraction_id] = { + combined_results[result.id] = { "semantic_rank": semantic_limit, "full_text_rank": rank, "data": result, @@ -575,14 +571,14 @@ async def hybrid_search( ) offset_results = sorted_results[ search_settings.offset : search_settings.offset - + search_settings.search_limit + + search_settings.limit ] return [ - VectorSearchResult( - extraction_id=result["data"].extraction_id, + ChunkSearchResult( + id=result["data"].id, document_id=result["data"].document_id, - user_id=result["data"].user_id, + owner_id=result["data"].owner_id, collection_ids=result["data"].collection_ids, text=result["data"].text, score=result["rrf_score"], @@ -598,21 +594,21 @@ async def hybrid_search( async def delete( self, filters: dict[str, Any] ) -> dict[str, dict[str, str]]: - params: list[Union[str, int, bytes]] = [] + params: list[str | int | bytes] = [] where_clause = self._build_filters(filters, params) query = f""" - DELETE FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + DELETE FROM {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} WHERE {where_clause} - RETURNING extraction_id, document_id, text; + RETURNING id, document_id, text; """ results = await self.connection_manager.fetch_query(query, params) return { - str(result["extraction_id"]): { + str(result["id"]): { "status": "deleted", - "extraction_id": str(result["extraction_id"]), + "id": str(result["id"]), "document_id": str(result["document_id"]), "text": result["text"], } @@ -623,7 +619,7 @@ async def assign_document_to_collection_vector( self, document_id: UUID, collection_id: UUID ) -> None: query = f""" - UPDATE {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + UPDATE {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} SET collection_ids = array_append(collection_ids, $1) WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids)); """ @@ -635,7 +631,7 @@ async def remove_document_from_collection_vector( self, document_id: UUID, collection_id: UUID ) -> None: query = f""" - UPDATE {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + UPDATE {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} SET collection_ids = array_remove(collection_ids, $1) WHERE document_id = $2; """ @@ -643,16 +639,16 @@ async def remove_document_from_collection_vector( query, (collection_id, document_id) ) - async def delete_user_vector(self, user_id: UUID) -> None: + async def delete_user_vector(self, owner_id: UUID) -> None: query = f""" - DELETE FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} - WHERE user_id = $1; + DELETE FROM {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} + WHERE owner_id = $1; """ - await self.connection_manager.execute_query(query, (user_id,)) + await self.connection_manager.execute_query(query, (owner_id,)) async def delete_collection_vector(self, collection_id: UUID) -> None: query = f""" - DELETE FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + DELETE FROM {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} WHERE $1 = ANY(collection_ids) RETURNING collection_ids """ @@ -661,19 +657,19 @@ async def delete_collection_vector(self, collection_id: UUID) -> None: ) return None - async def get_document_chunks( + async def list_document_chunks( self, document_id: UUID, - offset: int = 0, - limit: int = -1, + offset: int, + limit: int, include_vectors: bool = False, ) -> dict[str, Any]: vector_select = ", vec" if include_vectors else "" limit_clause = f"LIMIT {limit}" if limit > -1 else "" query = f""" - SELECT extraction_id, document_id, user_id, collection_ids, text, metadata{vector_select}, COUNT(*) OVER() AS total - FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + SELECT id, document_id, owner_id, collection_ids, text, metadata{vector_select}, COUNT(*) OVER() AS total + FROM {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} WHERE document_id = $1 ORDER BY (metadata->>'chunk_order')::integer OFFSET $2 @@ -690,9 +686,9 @@ async def get_document_chunks( total = results[0].get("total", 0) chunks = [ { - "extraction_id": result["extraction_id"], + "id": result["id"], "document_id": result["document_id"], - "user_id": result["user_id"], + "owner_id": result["owner_id"], "collection_ids": result["collection_ids"], "text": result["text"], "metadata": json.loads(result["metadata"]), @@ -705,36 +701,34 @@ async def get_document_chunks( return {"results": chunks, "total_entries": total} - async def get_chunk(self, extraction_id: UUID) -> Optional[dict[str, Any]]: + async def get_chunk(self, id: UUID) -> dict: query = f""" - SELECT extraction_id, document_id, user_id, collection_ids, text, metadata - FROM {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} - WHERE extraction_id = $1; + SELECT id, document_id, owner_id, collection_ids, text, metadata + FROM {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} + WHERE id = $1; """ - result = await self.connection_manager.fetchrow_query( - query, (extraction_id,) - ) + result = await self.connection_manager.fetchrow_query(query, (id,)) if result: return { - "extraction_id": result["extraction_id"], + "id": result["id"], "document_id": result["document_id"], - "user_id": result["user_id"], + "owner_id": result["owner_id"], "collection_ids": result["collection_ids"], "text": result["text"], "metadata": json.loads(result["metadata"]), } - return None + raise R2RException( + message=f"Chunk with ID {id} not found", status_code=404 + ) async def create_index( self, table_name: Optional[VectorTableName] = None, index_measure: IndexMeasure = IndexMeasure.cosine_distance, index_method: IndexMethod = IndexMethod.auto, - index_arguments: Optional[ - Union[IndexArgsIVFFlat, IndexArgsHNSW] - ] = None, + index_arguments: Optional[IndexArgsIVFFlat | IndexArgsHNSW] = None, index_name: Optional[str] = None, index_column: Optional[str] = None, concurrently: bool = True, @@ -771,8 +765,8 @@ async def create_index( ArgError: If an invalid index method is used, or if *replace* is False and an index already exists. """ - if table_name == VectorTableName.VECTORS: - table_name_str = f"{self.project_name}.{VectorTableName.VECTORS}" # TODO - Fix bug in vector table naming convention + if table_name == VectorTableName.CHUNKS: + table_name_str = f"{self.project_name}.{VectorTableName.CHUNKS}" # TODO - Fix bug in vector table naming convention if index_column: col_name = index_column else: @@ -789,9 +783,9 @@ async def create_index( f"{self.project_name}.{VectorTableName.ENTITIES_DOCUMENT}" ) col_name = "description_embedding" - elif table_name == VectorTableName.ENTITIES_COLLECTION: + elif table_name == VectorTableName.GRAPHS_ENTITIES: table_name_str = ( - f"{self.project_name}.{VectorTableName.ENTITIES_COLLECTION}" + f"{self.project_name}.{VectorTableName.GRAPHS_ENTITIES}" ) col_name = "description_embedding" elif table_name == VectorTableName.COMMUNITIES: @@ -871,7 +865,7 @@ async def create_index( return None def _build_filters( - self, filters: dict, parameters: list[Union[str, int, bytes]] + self, filters: dict, parameters: list[str | int | bytes] ) -> str: def parse_condition(key: str, value: Any) -> str: # type: ignore @@ -1003,75 +997,107 @@ def parse_filter(filter_dict: dict) -> str: return where_clause async def list_indices( - self, table_name: Optional[VectorTableName] = None - ) -> list[dict[str, Any]]: - """ - Lists all vector indices for the specified table. - - Args: - table_name (VectorTableName, optional): The table to list indices for. - If None, defaults to VECTORS table. + self, + offset: int, + limit: int, + filters: Optional[dict[str, Any]] = None, + ) -> dict: + where_clauses = [] + params: list[Any] = [self.project_name] # Start with schema name + param_count = 1 + + # Handle filtering + if filters: + if "table_name" in filters: + where_clauses.append(f"i.tablename = ${param_count + 1}") + params.append(filters["table_name"]) + param_count += 1 + if "index_method" in filters: + where_clauses.append(f"am.amname = ${param_count + 1}") + params.append(filters["index_method"]) + param_count += 1 + if "index_name" in filters: + where_clauses.append( + f"LOWER(i.indexname) LIKE LOWER(${param_count + 1})" + ) + params.append(f"%{filters['index_name']}%") + param_count += 1 - Returns: - List[dict]: List of indices with their properties + where_clause = " AND ".join(where_clauses) if where_clauses else "" + if where_clause: + where_clause = "AND " + where_clause - Raises: - ArgError: If an invalid table name is provided + query = f""" + WITH index_info AS ( + SELECT + i.indexname as name, + i.tablename as table_name, + i.indexdef as definition, + am.amname as method, + pg_relation_size(c.oid) as size_in_bytes, + c.reltuples::bigint as row_estimate, + COALESCE(psat.idx_scan, 0) as number_of_scans, + COALESCE(psat.idx_tup_read, 0) as tuples_read, + COALESCE(psat.idx_tup_fetch, 0) as tuples_fetched, + COUNT(*) OVER() as total_count + FROM pg_indexes i + JOIN pg_class c ON c.relname = i.indexname + JOIN pg_am am ON c.relam = am.oid + LEFT JOIN pg_stat_user_indexes psat ON psat.indexrelname = i.indexname + AND psat.schemaname = i.schemaname + WHERE i.schemaname = $1 + AND i.indexdef LIKE '%vector%' + {where_clause} + ) + SELECT * + FROM index_info + ORDER BY name + LIMIT ${param_count + 1} + OFFSET ${param_count + 2} """ - if table_name == VectorTableName.VECTORS: - table_name_str = f"{self.project_name}.{VectorTableName.VECTORS}" - col_name = "vec" - elif table_name == VectorTableName.ENTITIES_DOCUMENT: - table_name_str = ( - f"{self.project_name}.{VectorTableName.ENTITIES_DOCUMENT}" - ) - col_name = "description_embedding" - elif table_name == VectorTableName.ENTITIES_COLLECTION: - table_name_str = ( - f"{self.project_name}.{VectorTableName.ENTITIES_COLLECTION}" - ) - elif table_name == VectorTableName.COMMUNITIES: - table_name_str = ( - f"{self.project_name}.{VectorTableName.COMMUNITIES}" - ) - col_name = "embedding" - else: - raise ArgError("invalid table name") - query = """ - SELECT - i.indexname as name, - i.indexdef as definition, - am.amname as method, - pg_relation_size(c.oid) as size_in_bytes, - COALESCE(psat.idx_scan, 0) as number_of_scans, - COALESCE(psat.idx_tup_read, 0) as tuples_read, - COALESCE(psat.idx_tup_fetch, 0) as tuples_fetched - FROM pg_indexes i - JOIN pg_class c ON c.relname = i.indexname - JOIN pg_am am ON c.relam = am.oid - LEFT JOIN pg_stat_user_indexes psat ON psat.indexrelname = i.indexname - AND psat.schemaname = i.schemaname - WHERE i.schemaname || '.' || i.tablename = $1 - AND i.indexdef LIKE $2; - """ + # Add limit and offset to params + params.extend([limit, offset]) - results = await self.connection_manager.fetch_query( - query, (table_name_str, f"%({col_name}%") - ) + results = await self.connection_manager.fetch_query(query, params) - return [ - { - "name": result["name"], - "definition": result["definition"], - "method": result["method"], - "size_in_bytes": result["size_in_bytes"], - "number_of_scans": result["number_of_scans"], - "tuples_read": result["tuples_read"], - "tuples_fetched": result["tuples_fetched"], - } - for result in results - ] + indices = [] + total_entries = 0 + + if results: + total_entries = results[0]["total_count"] + for result in results: + index_info = { + "name": result["name"], + "table_name": result["table_name"], + "definition": result["definition"], + "size_in_bytes": result["size_in_bytes"], + "row_estimate": result["row_estimate"], + "number_of_scans": result["number_of_scans"], + "tuples_read": result["tuples_read"], + "tuples_fetched": result["tuples_fetched"], + } + indices.append(index_info) + + # Calculate pagination info + total_pages = (total_entries + limit - 1) // limit if limit > 0 else 1 + current_page = (offset // limit) + 1 if limit > 0 else 1 + + page_info = { + "total_entries": total_entries, + "total_pages": total_pages, + "current_page": current_page, + "limit": limit, + "offset": offset, + "has_previous": offset > 0, + "has_next": offset + limit < total_entries, + "previous_offset": max(0, offset - limit) if offset > 0 else None, + "next_offset": ( + offset + limit if offset + limit < total_entries else None + ), + } + + return {"indices": indices, "page_info": page_info} async def delete_index( self, @@ -1092,17 +1118,17 @@ async def delete_index( Exception: If index deletion fails """ # Validate table name and get column name - if table_name == VectorTableName.VECTORS: - table_name_str = f"{self.project_name}.{VectorTableName.VECTORS}" + if table_name == VectorTableName.CHUNKS: + table_name_str = f"{self.project_name}.{VectorTableName.CHUNKS}" col_name = "vec" elif table_name == VectorTableName.ENTITIES_DOCUMENT: table_name_str = ( f"{self.project_name}.{VectorTableName.ENTITIES_DOCUMENT}" ) col_name = "description_embedding" - elif table_name == VectorTableName.ENTITIES_COLLECTION: + elif table_name == VectorTableName.GRAPHS_ENTITIES: table_name_str = ( - f"{self.project_name}.{VectorTableName.ENTITIES_COLLECTION}" + f"{self.project_name}.{VectorTableName.GRAPHS_ENTITIES}" ) col_name = "description_embedding" elif table_name == VectorTableName.COMMUNITIES: @@ -1158,34 +1184,35 @@ async def delete_index( async def get_semantic_neighbors( self, + offset: int, + limit: int, document_id: UUID, - chunk_id: UUID, - limit: int = 10, + id: UUID, similarity_threshold: float = 0.5, ) -> list[dict[str, Any]]: - table_name = self._get_table_name(PostgresVectorHandler.TABLE_NAME) + table_name = self._get_table_name(PostgresChunkHandler.TABLE_NAME) query = f""" WITH target_vector AS ( SELECT vec FROM {table_name} - WHERE document_id = $1 AND extraction_id = $2 + WHERE document_id = $1 AND id = $2 ) - SELECT t.extraction_id, t.text, t.metadata, t.document_id, (t.vec <=> tv.vec) AS similarity + SELECT t.id, t.text, t.metadata, t.document_id, (t.vec <=> tv.vec) AS similarity FROM {table_name} t, target_vector tv WHERE (t.vec <=> tv.vec) >= $3 AND t.document_id = $1 - AND t.extraction_id != $2 + AND t.id != $2 ORDER BY similarity ASC LIMIT $4 """ results = await self.connection_manager.fetch_query( query, - (str(document_id), str(chunk_id), similarity_threshold, limit), + (str(document_id), str(id), similarity_threshold, limit), ) return [ { - "extraction_id": str(r["extraction_id"]), + "id": str(r["id"]), "text": r["text"], "metadata": json.loads(r["metadata"]), "document_id": str(r["document_id"]), @@ -1194,10 +1221,245 @@ async def get_semantic_neighbors( for r in results ] + async def list_chunks( + self, + offset: int, + limit: int, + filters: Optional[dict[str, Any]] = None, + include_vectors: bool = False, + ) -> dict[str, Any]: + """ + List chunks with pagination support. + + Args: + offset (int, optional): Number of records to skip. Defaults to 0. + limit (int, optional): Maximum number of records to return. Defaults to 10. + filters (dict, optional): Dictionary of filters to apply. Defaults to None. + include_vectors (bool, optional): Whether to include vector data. Defaults to False. + + Returns: + dict: Dictionary containing: + - results: List of chunk records + - total_entries: Total number of chunks matching the filters + - page_info: Pagination information + """ + # Validate sort parameters + valid_sort_columns = { + "created_at": "metadata->>'created_at'", + "updated_at": "metadata->>'updated_at'", + "chunk_order": "metadata->>'chunk_order'", + "text": "text", + } + + # Build the select clause + vector_select = ", vec" if include_vectors else "" + select_clause = f""" + id, document_id, owner_id, collection_ids, + text, metadata{vector_select}, COUNT(*) OVER() AS total + """ + + # Build the where clause if filters are provided + where_clause = "" + params: list[str | int | bytes] = [] + if filters: + where_clause = self._build_filters(filters, params) + where_clause = f"WHERE {where_clause}" + + # Construct the final query + query = f""" + SELECT {select_clause} + FROM {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} + {where_clause} + LIMIT $%s + OFFSET $%s + """ + + # Add pagination parameters + params.extend([limit, offset]) + param_indices = list(range(1, len(params) + 1)) + formatted_query = query % tuple(param_indices) + + # Execute the query + results = await self.connection_manager.fetch_query( + formatted_query, params + ) + + # Process results + chunks = [] + total = 0 + if results: + total = results[0].get("total", 0) + chunks = [ + { + "id": str(result["id"]), + "document_id": str(result["document_id"]), + "owner_id": str(result["owner_id"]), + "collection_ids": result["collection_ids"], + "text": result["text"], + "metadata": json.loads(result["metadata"]), + "vector": ( + json.loads(result["vec"]) if include_vectors else None + ), + } + for result in results + ] + + # Calculate pagination info + total_pages = (total + limit - 1) // limit if limit > 0 else 1 + current_page = (offset // limit) + 1 if limit > 0 else 1 + + page_info = { + "total_entries": total, + "total_pages": total_pages, + "current_page": current_page, + "limit": limit, + "offset": offset, + "has_previous": offset > 0, + "has_next": offset + limit < total, + "previous_offset": max(0, offset - limit) if offset > 0 else None, + "next_offset": offset + limit if offset + limit < total else None, + } + + return {"results": chunks, "page_info": page_info} + + async def search_documents( + self, + query_text: str, + settings: SearchSettings, + ) -> list[dict[str, Any]]: + """ + Search for documents based on their metadata fields and/or body text. + Joins with documents table to get complete document metadata. + + Args: + query_text (str): The search query text + settings (SearchSettings): Search settings including search preferences and filters + + Returns: + list[dict[str, Any]]: List of documents with their search scores and complete metadata + """ + where_clauses = [] + params: list[str | int | bytes] = [query_text] + + # Build the dynamic metadata field search expression + metadata_fields_expr = " || ' ' || ".join( + [ + f"COALESCE(v.metadata->>{psql_quote_literal(key)}, '')" + for key in settings.metadata_keys # type: ignore + ] + ) + + query = f""" + WITH + -- Metadata search scores + metadata_scores AS ( + SELECT DISTINCT ON (v.document_id) + v.document_id, + d.metadata as doc_metadata, + CASE WHEN $1 = '' THEN 0.0 + ELSE + ts_rank_cd( + setweight(to_tsvector('english', {metadata_fields_expr}), 'A'), + websearch_to_tsquery('english', $1), + 32 + ) + END as metadata_rank + FROM {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} v + LEFT JOIN {self._get_table_name('documents')} d ON v.document_id = d.id + WHERE v.metadata IS NOT NULL + ), + -- Body search scores + body_scores AS ( + SELECT + document_id, + AVG( + ts_rank_cd( + setweight(to_tsvector('english', COALESCE(text, '')), 'B'), + websearch_to_tsquery('english', $1), + 32 + ) + ) as body_rank + FROM {self._get_table_name(PostgresChunkHandler.TABLE_NAME)} + WHERE $1 != '' + {f"AND to_tsvector('english', text) @@ websearch_to_tsquery('english', $1)" if settings.search_over_body else ""} + GROUP BY document_id + ), + -- Combined scores with document metadata + combined_scores AS ( + SELECT + COALESCE(m.document_id, b.document_id) as document_id, + m.doc_metadata as metadata, + COALESCE(m.metadata_rank, 0) as debug_metadata_rank, + COALESCE(b.body_rank, 0) as debug_body_rank, + CASE + WHEN {str(settings.search_over_metadata).lower()} AND {str(settings.search_over_body).lower()} THEN + COALESCE(m.metadata_rank, 0) * {settings.metadata_weight} + COALESCE(b.body_rank, 0) * {settings.title_weight} + WHEN {str(settings.search_over_metadata).lower()} THEN + COALESCE(m.metadata_rank, 0) + WHEN {str(settings.search_over_body).lower()} THEN + COALESCE(b.body_rank, 0) + ELSE 0 + END as rank + FROM metadata_scores m + FULL OUTER JOIN body_scores b ON m.document_id = b.document_id + WHERE ( + ($1 = '') OR + ({str(settings.search_over_metadata).lower()} AND m.metadata_rank > 0) OR + ({str(settings.search_over_body).lower()} AND b.body_rank > 0) + ) + """ + + # Add any additional filters + if settings.filters: + filter_clause = self._build_filters(settings.filters, params) + where_clauses.append(filter_clause) + + if where_clauses: + query += f" AND {' AND '.join(where_clauses)}" + + query += """ + ) + SELECT + document_id, + metadata, + rank as score, + debug_metadata_rank, + debug_body_rank + FROM combined_scores + WHERE rank > 0 + ORDER BY rank DESC + OFFSET ${offset_param} LIMIT ${limit_param} + """.format( + offset_param=len(params) + 1, + limit_param=len(params) + 2, + ) + + # Add offset and limit to params + params.extend([settings.offset, settings.limit]) + + # Execute query + results = await self.connection_manager.fetch_query(query, params) + + # Format results with complete document metadata + return [ + { + "document_id": str(r["document_id"]), + "metadata": ( + json.loads(r["metadata"]) + if isinstance(r["metadata"], str) + else r["metadata"] + ), + "score": float(r["score"]), + "debug_metadata_rank": float(r["debug_metadata_rank"]), + "debug_body_rank": float(r["debug_body_rank"]), + } + for r in results + ] + def _get_index_options( self, method: IndexMethod, - index_arguments: Optional[Union[IndexArgsIVFFlat, IndexArgsHNSW]], + index_arguments: Optional[IndexArgsIVFFlat | IndexArgsHNSW], ) -> str: if method == IndexMethod.ivfflat: if isinstance(index_arguments, IndexArgsIVFFlat): diff --git a/py/core/providers/email/__init__.py b/py/core/providers/email/__init__.py index 0755615e4..a4e4cc25e 100644 --- a/py/core/providers/email/__init__.py +++ b/py/core/providers/email/__init__.py @@ -1,6 +1,6 @@ from .console_mock import ConsoleMockEmailProvider -from .smtp import AsyncSMTPEmailProvider from .sendgrid import SendGridEmailProvider +from .smtp import AsyncSMTPEmailProvider __all__ = [ "ConsoleMockEmailProvider", diff --git a/py/core/providers/email/sendgrid.py b/py/core/providers/email/sendgrid.py index af2a27961..5ae5f2d3b 100644 --- a/py/core/providers/email/sendgrid.py +++ b/py/core/providers/email/sendgrid.py @@ -3,7 +3,8 @@ from typing import Optional from sendgrid import SendGridAPIClient -from sendgrid.helpers.mail import Mail, Content, From +from sendgrid.helpers.mail import Content, From, Mail + from core.base import EmailConfig, EmailProvider logger = logging.getLogger(__name__) diff --git a/py/core/providers/embeddings/litellm.py b/py/core/providers/embeddings/litellm.py index 8e6e579b2..2aab16ff3 100644 --- a/py/core/providers/embeddings/litellm.py +++ b/py/core/providers/embeddings/litellm.py @@ -9,11 +9,11 @@ from litellm import AuthenticationError, aembedding, embedding from core.base import ( + ChunkSearchResult, EmbeddingConfig, EmbeddingProvider, EmbeddingPurpose, R2RException, - VectorSearchResult, ) logger = logging.getLogger() @@ -193,7 +193,7 @@ def get_embeddings( def rerank( self, query: str, - results: list[VectorSearchResult], + results: list[ChunkSearchResult], stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, limit: int = 10, ): @@ -229,7 +229,7 @@ def rerank( copied_result.score = rank_info["score"] scored_results.append(copied_result) - # Return only the VectorSearchResult objects, limited to specified count + # Return only the ChunkSearchResult objects, limited to specified count return scored_results[:limit] except requests.RequestException as e: @@ -242,21 +242,21 @@ def rerank( async def arerank( self, query: str, - results: list[VectorSearchResult], + results: list[ChunkSearchResult], stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, limit: int = 10, - ) -> list[VectorSearchResult]: + ) -> list[ChunkSearchResult]: """ Asynchronously rerank search results using the configured rerank model. Args: query: The search query string - results: List of VectorSearchResult objects to rerank + results: List of ChunkSearchResult objects to rerank stage: The pipeline stage (must be RERANK) limit: Maximum number of results to return Returns: - List of reranked VectorSearchResult objects, limited to specified count + List of reranked ChunkSearchResult objects, limited to specified count """ if self.config.rerank_model is not None: if not self.rerank_url: @@ -291,7 +291,7 @@ async def arerank( copied_result.score = rank_info["score"] scored_results.append(copied_result) - # Return only the VectorSearchResult objects, limited to specified count + # Return only the ChunkSearchResult objects, limited to specified count return scored_results[:limit] except (ClientError, Exception) as e: diff --git a/py/core/providers/embeddings/ollama.py b/py/core/providers/embeddings/ollama.py index c054e846d..74e710378 100644 --- a/py/core/providers/embeddings/ollama.py +++ b/py/core/providers/embeddings/ollama.py @@ -1,15 +1,15 @@ import logging import os -from typing import Any, List +from typing import Any from ollama import AsyncClient, Client from core.base import ( + ChunkSearchResult, EmbeddingConfig, EmbeddingProvider, EmbeddingPurpose, R2RException, - VectorSearchResult, ) logger = logging.getLogger() @@ -51,7 +51,7 @@ def _get_embedding_kwargs(self, **kwargs): embedding_kwargs.update(kwargs) return embedding_kwargs - async def _execute_task(self, task: dict[str, Any]) -> List[List[float]]: + async def _execute_task(self, task: dict[str, Any]) -> list[list[float]]: texts = task["texts"] purpose = task.get("purpose", EmbeddingPurpose.INDEX) kwargs = self._get_embedding_kwargs(**task.get("kwargs", {})) @@ -73,7 +73,7 @@ async def _execute_task(self, task: dict[str, Any]) -> List[List[float]]: logger.error(error_msg) raise R2RException(error_msg, 400) - def _execute_task_sync(self, task: dict[str, Any]) -> List[List[float]]: + def _execute_task_sync(self, task: dict[str, Any]) -> list[list[float]]: texts = task["texts"] purpose = task.get("purpose", EmbeddingPurpose.INDEX) kwargs = self._get_embedding_kwargs(**task.get("kwargs", {})) @@ -99,7 +99,7 @@ async def async_get_embedding( stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, **kwargs, - ) -> List[float]: + ) -> list[float]: if stage != EmbeddingProvider.PipeStage.BASE: raise ValueError( "OllamaEmbeddingProvider only supports search stage." @@ -120,7 +120,7 @@ def get_embedding( stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, **kwargs, - ) -> List[float]: + ) -> list[float]: if stage != EmbeddingProvider.PipeStage.BASE: raise ValueError( "OllamaEmbeddingProvider only supports search stage." @@ -137,11 +137,11 @@ def get_embedding( async def async_get_embeddings( self, - texts: List[str], + texts: list[str], stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, **kwargs, - ) -> List[List[float]]: + ) -> list[list[float]]: if stage != EmbeddingProvider.PipeStage.BASE: raise ValueError( "OllamaEmbeddingProvider only supports search stage." @@ -157,11 +157,11 @@ async def async_get_embeddings( def get_embeddings( self, - texts: List[str], + texts: list[str], stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, **kwargs, - ) -> List[List[float]]: + ) -> list[list[float]]: if stage != EmbeddingProvider.PipeStage.BASE: raise ValueError( "OllamaEmbeddingProvider only supports search stage." @@ -178,16 +178,16 @@ def get_embeddings( def rerank( self, query: str, - results: list[VectorSearchResult], + results: list[ChunkSearchResult], stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, limit: int = 10, - ) -> list[VectorSearchResult]: + ) -> list[ChunkSearchResult]: return results[:limit] async def arerank( self, query: str, - results: list[VectorSearchResult], + results: list[ChunkSearchResult], stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, limit: int = 10, ): diff --git a/py/core/providers/embeddings/openai.py b/py/core/providers/embeddings/openai.py index 7aeb72ae6..83bd427ad 100644 --- a/py/core/providers/embeddings/openai.py +++ b/py/core/providers/embeddings/openai.py @@ -1,15 +1,15 @@ import logging import os -from typing import Any, List +from typing import Any from openai import AsyncOpenAI, AuthenticationError, OpenAI from openai._types import NOT_GIVEN from core.base import ( + ChunkSearchResult, EmbeddingConfig, EmbeddingProvider, EmbeddingPurpose, - VectorSearchResult, ) logger = logging.getLogger() @@ -96,7 +96,7 @@ def _get_embedding_kwargs(self, **kwargs): "dimensions": self._get_dimensions(), } | kwargs - async def _execute_task(self, task: dict[str, Any]) -> List[List[float]]: + async def _execute_task(self, task: dict[str, Any]) -> list[list[float]]: texts = task["texts"] kwargs = self._get_embedding_kwargs(**task.get("kwargs", {})) @@ -115,7 +115,7 @@ async def _execute_task(self, task: dict[str, Any]) -> List[List[float]]: logger.error(error_msg) raise ValueError(error_msg) from e - def _execute_task_sync(self, task: dict[str, Any]) -> List[List[float]]: + def _execute_task_sync(self, task: dict[str, Any]) -> list[list[float]]: texts = task["texts"] kwargs = self._get_embedding_kwargs(**task.get("kwargs", {})) try: @@ -139,7 +139,7 @@ async def async_get_embedding( stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, **kwargs, - ) -> List[float]: + ) -> list[float]: if stage != EmbeddingProvider.PipeStage.BASE: raise ValueError( "OpenAIEmbeddingProvider only supports search stage." @@ -160,7 +160,7 @@ def get_embedding( stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, **kwargs, - ) -> List[float]: + ) -> list[float]: if stage != EmbeddingProvider.PipeStage.BASE: raise ValueError( "OpenAIEmbeddingProvider only supports search stage." @@ -177,11 +177,11 @@ def get_embedding( async def async_get_embeddings( self, - texts: List[str], + texts: list[str], stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, **kwargs, - ) -> List[List[float]]: + ) -> list[list[float]]: if stage != EmbeddingProvider.PipeStage.BASE: raise ValueError( "OpenAIEmbeddingProvider only supports search stage." @@ -197,11 +197,11 @@ async def async_get_embeddings( def get_embeddings( self, - texts: List[str], + texts: list[str], stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, **kwargs, - ) -> List[List[float]]: + ) -> list[list[float]]: if stage != EmbeddingProvider.PipeStage.BASE: raise ValueError( "OpenAIEmbeddingProvider only supports search stage." @@ -218,7 +218,7 @@ def get_embeddings( def rerank( self, query: str, - results: list[VectorSearchResult], + results: list[ChunkSearchResult], stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, limit: int = 10, ): @@ -227,7 +227,7 @@ def rerank( async def arerank( self, query: str, - results: list[VectorSearchResult], + results: list[ChunkSearchResult], stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, limit: int = 10, ): diff --git a/py/core/providers/ingestion/r2r/base.py b/py/core/providers/ingestion/r2r/base.py index 5e071ab4d..42e0e6554 100644 --- a/py/core/providers/ingestion/r2r/base.py +++ b/py/core/providers/ingestion/r2r/base.py @@ -8,7 +8,7 @@ AsyncParser, ChunkingStrategy, Document, - DocumentExtraction, + DocumentChunk, DocumentType, IngestionConfig, IngestionProvider, @@ -16,7 +16,7 @@ RecursiveCharacterTextSplitter, TextSplitter, ) -from core.base.abstractions import DocumentExtraction +from core.base.abstractions import DocumentChunk from core.utils import generate_extraction_id from ...database import PostgresDBProvider @@ -169,7 +169,7 @@ def validate_config(self) -> bool: def chunk( self, - parsed_document: Union[str, DocumentExtraction], + parsed_document: Union[str, DocumentChunk], ingestion_config_override: dict, ) -> AsyncGenerator[Any, None]: @@ -178,7 +178,7 @@ def chunk( text_spliiter = self._build_text_splitter( ingestion_config_override ) - if isinstance(parsed_document, DocumentExtraction): + if isinstance(parsed_document, DocumentChunk): parsed_document = parsed_document.data if isinstance(parsed_document, str): @@ -198,7 +198,7 @@ async def parse( # type: ignore document: Document, ingestion_config_override: dict, ) -> AsyncGenerator[ - Union[DocumentExtraction, R2RDocumentProcessingError], None + Union[DocumentChunk, R2RDocumentProcessingError], None ]: if document.document_type not in self.parsers: yield R2RDocumentProcessingError( @@ -236,10 +236,10 @@ async def parse( # type: ignore iteration = 0 chunks = self.chunk(contents, ingestion_config_override) for chunk in chunks: - extraction = DocumentExtraction( + extraction = DocumentChunk( id=generate_extraction_id(document.id, iteration), document_id=document.id, - user_id=document.user_id, + owner_id=document.owner_id, collection_ids=document.collection_ids, data=chunk, metadata={**document.metadata, "chunk_order": iteration}, diff --git a/py/core/providers/ingestion/unstructured/base.py b/py/core/providers/ingestion/unstructured/base.py index 39e25f58f..b4dcb6abc 100644 --- a/py/core/providers/ingestion/unstructured/base.py +++ b/py/core/providers/ingestion/unstructured/base.py @@ -17,7 +17,7 @@ AsyncParser, ChunkingStrategy, Document, - DocumentExtraction, + DocumentChunk, DocumentType, RecursiveCharacterTextSplitter, ) @@ -216,7 +216,7 @@ async def parse( file_content: bytes, document: Document, ingestion_config_override: dict, - ) -> AsyncGenerator[DocumentExtraction, None]: + ) -> AsyncGenerator[DocumentChunk, None]: ingestion_config = copy( { @@ -343,10 +343,10 @@ async def parse( metadata["partitioned_by_unstructured"] = True metadata["chunk_order"] = iteration # creating the text extraction - yield DocumentExtraction( + yield DocumentChunk( id=generate_extraction_id(document.id, iteration), document_id=document.id, - user_id=document.user_id, + owner_id=document.owner_id, collection_ids=document.collection_ids, data=text, metadata=metadata, diff --git a/py/core/providers/logger/r2r_logger.py b/py/core/providers/logger/r2r_logger.py index 14aff8bdc..e46bac4ae 100644 --- a/py/core/providers/logger/r2r_logger.py +++ b/py/core/providers/logger/r2r_logger.py @@ -4,8 +4,9 @@ import logging import os import uuid +from contextlib import asynccontextmanager from datetime import datetime -from typing import Optional, Tuple, Union +from typing import Optional, Tuple from uuid import UUID from fastapi.responses import StreamingResponse @@ -17,6 +18,7 @@ RunInfoLog, RunType, ) +from shared.api.models.management.responses import MessageResponse logger = logging.getLogger() @@ -45,8 +47,9 @@ def __init__(self, config: PersistentLoggingConfig): ) async def initialize(self): - self.conn = await self.aiosqlite.connect(self.logging_path) - + """Initialize the database connection and tables.""" + if self.conn is None: + self.conn = await self.aiosqlite.connect(self.logging_path) await self.conn.execute( f""" CREATE TABLE IF NOT EXISTS {self.project_name}_{self.log_table} ( @@ -127,7 +130,7 @@ async def initialize(self): async def __aenter__(self): if self.conn is None: - await self._init() + await self.initialize() # Fixed incorrect _init() reference return self async def __aexit__(self, exc_type, exc_val, exc_tb): @@ -138,6 +141,21 @@ async def close(self): await self.conn.close() self.conn = None + @asynccontextmanager + async def savepoint(self, name: str): + """Create a savepoint with proper error handling.""" + if self.conn is None: + await self.initialize() + assert self.conn is not None + async with self.conn.cursor() as cursor: + await cursor.execute(f"SAVEPOINT {name}") + try: + yield + await cursor.execute(f"RELEASE SAVEPOINT {name}") + except Exception: + await cursor.execute(f"ROLLBACK TO SAVEPOINT {name}") + raise + async def log( self, run_id: UUID, @@ -247,7 +265,10 @@ async def create_conversation( ), ) await self.conn.commit() - return conversation_id + return { + "id": conversation_id, + "created_at": created_at, + } async def verify_conversation_access( self, conversation_id: str, user_id: UUID @@ -267,10 +288,10 @@ async def verify_conversation_access( async def get_conversations_overview( self, - conversation_ids: Optional[list[UUID]] = None, + offset: int, + limit: int, user_ids: Optional[UUID | list[UUID]] = None, - offset: int = 0, - limit: int = -1, + conversation_ids: Optional[list[UUID]] = None, ) -> dict[str, list[dict] | int]: """ Get conversations overview with pagination. @@ -336,7 +357,7 @@ async def get_conversations_overview( conversations = [ { - "conversation_id": row[0], + "id": row[0], "created_at": row[1], "user_id": UUID(row[2]) if row[2] else None, "name": row[3] or None, @@ -363,13 +384,18 @@ async def add_message( message_id = str(uuid.uuid4()) created_at = datetime.utcnow().timestamp() + # Serialize the message content to a JSON string + content_json = ( + content.model_dump_json() + ) # Use model_dump_json instead of json() + await self.conn.execute( "INSERT INTO messages (id, conversation_id, parent_id, content, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?)", ( message_id, conversation_id, parent_id, - content.json(), + content_json, created_at, json.dumps(metadata or {}), ), @@ -402,9 +428,9 @@ async def add_message( branch_id = str(uuid.uuid4()) await self.conn.execute( """ - INSERT INTO branches (id, conversation_id, branch_point_id) VALUES (?, ?, NULL) + INSERT INTO branches (id, conversation_id, branch_point_id, created_at) VALUES (?, ?, NULL, ?) """, - (branch_id, conversation_id), + (branch_id, conversation_id, created_at), ) await self.conn.execute( """ @@ -414,7 +440,10 @@ async def add_message( ) await self.conn.commit() - return message_id + return { + "id": message_id, + "message": content, + } async def edit_message( self, message_id: str, new_content: str @@ -456,13 +485,16 @@ async def edit_message( # Add the edited message with the same parent_id new_message_id = str(uuid.uuid4()) message_created_at = datetime.utcnow().timestamp() + + edited_message_json = edited_message.model_dump_json() + await self.conn.execute( "INSERT INTO messages (id, conversation_id, parent_id, content, created_at, metadata) VALUES (?, ?, ?, ?, ?, ?)", ( new_message_id, conversation_id, parent_id, - edited_message.json(), + edited_message_json, # Use the serialized JSON string message_created_at, json.dumps({"edited": True}), ), @@ -548,7 +580,7 @@ async def update_message_metadata( async def export_messages_to_csv( self, chunk_size: int = 1000, return_type: str = "stream" - ) -> Union[StreamingResponse, str]: + ) -> StreamingResponse | str: """ Export messages table to CSV format. @@ -629,12 +661,24 @@ async def generate_csv(): async def get_conversation( self, conversation_id: str, branch_id: Optional[str] = None - ) -> Tuple[str, list[Message]]: + ) -> list[MessageResponse]: if not self.conn: raise ValueError( "Initialize the connection pool before attempting to log." ) + # Get conversation details first + async with self.conn.execute( + "SELECT created_at FROM conversations WHERE id = ?", + (conversation_id,), + ) as cursor: + row = await cursor.fetchone() + if row is None: + raise ValueError( + Message=f"Conversation {conversation_id} not found" + ) + conversation_created_at = row[0] + if branch_id is None: # Get the most recent branch by created_at timestamp async with self.conn.execute( @@ -649,8 +693,12 @@ async def get_conversation( row = await cursor.fetchone() branch_id = row[0] if row else None + # If no branch exists, return empty results but with required fields if branch_id is None: - return [] # No branches found for the conversation + return { + "id": conversation_id, + "created_at": conversation_created_at, + } # Get all messages for this branch async with self.conn.execute( @@ -674,40 +722,62 @@ async def get_conversation( ) as cursor: rows = await cursor.fetchall() return [ - ( - row[0], # id - Message.parse_raw(row[1]), # message content - json.loads(row[3]) if row[3] else {}, # metadata + MessageResponse( + id=row[0], + message=Message.parse_raw(row[1]), + metadata=json.loads(row[3]) if row[3] else {}, ) for row in rows ] - async def get_branches_overview(self, conversation_id: str) -> list[dict]: + async def get_branches( + self, + offset: int, + limit: int, + conversation_id: str, + ) -> dict: if not self.conn: raise ValueError( "Initialize the connection pool before attempting to log." ) + query = """ + WITH branch_data AS ( + SELECT b.id, b.branch_point_id, m.content, b.created_at + FROM branches b + LEFT JOIN messages m ON b.branch_point_id = m.id + WHERE b.conversation_id = ? + ), + counted_branches AS ( + SELECT *, COUNT(*) OVER() as total_entries + FROM branch_data + ) + SELECT * FROM counted_branches + ORDER BY created_at DESC + LIMIT ? OFFSET ? + """ + async with self.conn.execute( - """ - SELECT b.id, b.branch_point_id, m.content, b.created_at - FROM branches b - LEFT JOIN messages m ON b.branch_point_id = m.id - WHERE b.conversation_id = ? - ORDER BY b.created_at - """, - (conversation_id,), + query, (conversation_id, limit, offset) ) as cursor: rows = await cursor.fetchall() - return [ - { - "branch_id": row[0], - "branch_point_id": row[1], - "content": row[2], - "created_at": row[3], - } - for row in rows - ] + + if not rows: + return {"results": [], "total_entries": 0} + + branches = [ + { + "branch_id": row[0], + "branch_point_id": row[1], + "content": row[2], + "created_at": row[3], + } + for row in rows + ] + + total_entries = rows[0][-1] if rows else 0 + + return {"results": branches, "total_entries": total_entries} async def get_next_branch(self, current_branch_id: str) -> Optional[str]: if not self.conn: @@ -774,9 +844,10 @@ async def branch_at_message(self, message_id: str) -> str: # Create a new branch starting from message_id new_branch_id = str(uuid.uuid4()) + created_at = datetime.utcnow().timestamp() await self.conn.execute( - "INSERT INTO branches (id, conversation_id, branch_point_id) VALUES (?, ?, ?)", - (new_branch_id, conversation_id, message_id), + "INSERT INTO branches (id, conversation_id, branch_point_id, created_at) VALUES (?, ?, ?, ?)", + (new_branch_id, conversation_id, message_id, created_at), ) # Link ancestor messages to the new branch @@ -797,13 +868,11 @@ async def branch_at_message(self, message_id: str) -> str: return new_branch_id async def delete_conversation(self, conversation_id: str): - if not self.conn: - raise ValueError( - "Initialize the connection pool before attempting to log." - ) - - # Begin a transaction - async with self.conn.execute("BEGIN TRANSACTION"): + """Delete a conversation and all related data.""" + if self.conn is None: + await self.initialize() + try: + assert self.conn is not None # Delete all message branches associated with the conversation await self.conn.execute( "DELETE FROM message_branches WHERE message_id IN (SELECT id FROM messages WHERE conversation_id = ?)", @@ -823,8 +892,11 @@ async def delete_conversation(self, conversation_id: str): await self.conn.execute( "DELETE FROM conversations WHERE id = ?", (conversation_id,) ) - # Commit the transaction await self.conn.commit() + except Exception: + assert self.conn is not None + await self.conn.rollback() + raise async def get_logs( self, diff --git a/py/core/telemetry/events.py b/py/core/telemetry/events.py index 0151a5fcc..090028ea7 100644 --- a/py/core/telemetry/events.py +++ b/py/core/telemetry/events.py @@ -1,9 +1,9 @@ import uuid -from typing import Any, Dict, Optional +from typing import Any, Optional class BaseTelemetryEvent: - def __init__(self, event_type: str, properties: Dict[str, Any]): + def __init__(self, event_type: str, properties: dict[str, Any]): self.event_type = event_type self.properties = properties self.event_id = str(uuid.uuid4()) @@ -19,7 +19,7 @@ def __init__( self, user_id: str, feature: str, - properties: Optional[Dict[str, Any]] = None, + properties: Optional[dict[str, Any]] = None, ): super().__init__( "FeatureUsage", @@ -37,7 +37,7 @@ def __init__( user_id: str, endpoint: str, error_message: str, - properties: Optional[Dict[str, Any]] = None, + properties: Optional[dict[str, Any]] = None, ): super().__init__( "Error", @@ -55,7 +55,7 @@ def __init__( self, endpoint: str, latency: float, - properties: Optional[Dict[str, Any]] = None, + properties: Optional[dict[str, Any]] = None, ): super().__init__( "RequestLatency", @@ -72,7 +72,7 @@ def __init__( self, user_id: str, country: str, - properties: Optional[Dict[str, Any]] = None, + properties: Optional[dict[str, Any]] = None, ): super().__init__( "GeographicDistribution", @@ -89,7 +89,7 @@ def __init__( self, user_id: str, duration: float, - properties: Optional[Dict[str, Any]] = None, + properties: Optional[dict[str, Any]] = None, ): super().__init__( "SessionDuration", @@ -106,7 +106,7 @@ def __init__( self, user_id: str, path: str, - properties: Optional[Dict[str, Any]] = None, + properties: Optional[dict[str, Any]] = None, ): super().__init__( "UserPath", diff --git a/py/core/utils/__init__.py b/py/core/utils/__init__.py index 411d8c329..425c4aa88 100644 --- a/py/core/utils/__init__.py +++ b/py/core/utils/__init__.py @@ -1,14 +1,11 @@ from shared.utils.base_utils import ( decrement_version, - format_entity_types, - format_relations, format_search_results_for_llm, format_search_results_for_stream, - generate_collection_id_from_name, generate_default_user_collection_id, generate_document_id, generate_extraction_id, - generate_run_id, + generate_id, generate_user_id, increment_version, run_pipeline, @@ -22,14 +19,11 @@ ) __all__ = [ - "format_entity_types", - "format_relations", "format_search_results_for_stream", "format_search_results_for_llm", - "generate_run_id", + "generate_id", "generate_document_id", "generate_extraction_id", - "generate_collection_id_from_name", "generate_user_id", "increment_version", "decrement_version", diff --git a/py/migrations/versions/2fac23e4d91b_migrate_to_document_search.py b/py/migrations/versions/2fac23e4d91b_migrate_to_document_search.py index 697d19009..54e39ab08 100644 --- a/py/migrations/versions/2fac23e4d91b_migrate_to_document_search.py +++ b/py/migrations/versions/2fac23e4d91b_migrate_to_document_search.py @@ -157,7 +157,7 @@ async def async_generate_all_summaries(): summary_text = summary["results"]["choices"][0]["message"][ "content" ] - embedding_vector = client.embedding(summary_text)["results"][0] + embedding_vector = await client.embedding(summary_text) # embedding_response = await openai_client.embeddings.create( # model=embedding_model, input=summary_text, dimensions=dimension # ) @@ -218,14 +218,16 @@ def upgrade() -> None: if check_if_upgrade_needed(): # Load the document summaries generate_all_summaries() + document_summaries = None try: with open("document_summaries.json", "r") as f: document_summaries = json.load(f) print(f"Loaded {len(document_summaries)} document summaries") except FileNotFoundError: - raise ValueError( - "document_summaries.json not found. Please run the summary generation script first." + print( + "document_summaries.json not found. Continuing without summaries and/or summary embeddings." ) + pass except json.JSONDecodeError: raise ValueError("Invalid document_summaries.json file") @@ -267,22 +269,27 @@ def upgrade() -> None: """ ) - # Update existing documents with summaries and embeddings - for doc_id, doc_data in document_summaries.items(): - # Convert the embedding array to the PostgreSQL vector format - embedding_str = ( - f"[{','.join(str(x) for x in doc_data['embedding'])}]" - ) - - # Use plain SQL with proper escaping for PostgreSQL - op.execute( - f""" - UPDATE {project_name}.document_info - SET - summary = '{doc_data['summary'].replace("'", "''")}', - summary_embedding = '{embedding_str}'::vector({dimension}) - WHERE document_id = '{doc_id}'::uuid; - """ + if document_summaries: + # Update existing documents with summaries and embeddings + for doc_id, doc_data in document_summaries.items(): + # Convert the embedding array to the PostgreSQL vector format + embedding_str = ( + f"[{','.join(str(x) for x in doc_data['embedding'])}]" + ) + + # Use plain SQL with proper escaping for PostgreSQL + op.execute( + f""" + UPDATE {project_name}.document_info + SET + summary = '{doc_data['summary'].replace("'", "''")}', + summary_embedding = '{embedding_str}'::vector({dimension}) + WHERE document_id = '{doc_id}'::uuid; + """ + ) + else: + print( + "No document summaries found, skipping update of existing documents" ) diff --git a/py/migrations/versions/8077140e1e99_v3_api_database_revision.py b/py/migrations/versions/8077140e1e99_v3_api_database_revision.py new file mode 100644 index 000000000..bc1b6c85e --- /dev/null +++ b/py/migrations/versions/8077140e1e99_v3_api_database_revision.py @@ -0,0 +1,349 @@ +"""v3_api_database_revision + +Revision ID: 8077140e1e99 +Revises: +Create Date: 2024-12-03 12:10:10.878485 + +""" + +import os +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "8077140e1e99" +down_revision: Union[str, None] = "2fac23e4d91b" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + +project_name = os.getenv("R2R_PROJECT_NAME") +if not project_name: + raise ValueError( + "Environment variable `R2R_PROJECT_NAME` must be provided migrate, it should be set equal to the value of `project_name` in your `r2r.toml`." + ) + +if ( + input( + "WARNING: This migration will delete all graph data. Are you sure you want to continue? (yes/no) " + ).lower() + != "yes" +): + raise ValueError("Migration aborted.") + + +def upgrade() -> None: + + # Collections table migration + op.alter_column( + "collections", + "collection_id", + new_column_name="id", + schema=project_name, + ) + + op.drop_column( + "collections", + "kg_enrichment_status", + schema=project_name, + ) + + op.add_column( + "collections", + sa.Column( + "owner_id", + sa.UUID, + server_default=sa.text("'2acb499e-8428-543b-bd85-0d9098718220'"), + ), + schema=project_name, + ) + + op.add_column( + "collections", + sa.Column( + "graph_sync_status", sa.Text, server_default=sa.text("'pending'") + ), + schema=project_name, + ) + + op.add_column( + "collections", + sa.Column( + "graph_cluster_status", + sa.Text, + server_default=sa.text("'pending'"), + ), + schema=project_name, + ) + + # Documents table migration + op.rename_table( + "document_info", + "documents", + schema=project_name, + ) + + op.alter_column( + "documents", + "document_id", + new_column_name="id", + schema=project_name, + ) + + op.alter_column( + "documents", + "user_id", + new_column_name="owner_id", + schema=project_name, + ) + + op.drop_column( + "documents", + "kg_extraction_status", + schema=project_name, + ) + + op.add_column( + "documents", + sa.Column( + "extraction_status", + sa.Text, + server_default=sa.text("'pending'"), + ), + schema=project_name, + ) + + op.alter_column( + "documents", + "doc_search_vector", + new_column_name="raw_tsvector", + schema=project_name, + ) + + # Files table migration + op.rename_table( + "file_storage", + "files", + schema=project_name, + ) + + op.alter_column( + "files", + "file_name", + new_column_name="name", + schema=project_name, + ) + + op.alter_column( + "files", + "file_oid", + new_column_name="oid", + schema=project_name, + ) + + op.alter_column( + "files", + "file_size", + new_column_name="size", + schema=project_name, + ) + + op.alter_column( + "files", + "file_type", + new_column_name="type", + schema=project_name, + ) + + # Prompts table migration + op.alter_column( + "prompts", + "prompt_id", + new_column_name="id", + schema=project_name, + ) + + # Users table migration + op.alter_column( + "users", + "user_id", + new_column_name="id", + schema=project_name, + ) + + # Chunks table migration + op.rename_table( + "vectors", + "chunks", + schema=project_name, + ) + + op.alter_column( + "chunks", + "extraction_id", + new_column_name="id", + schema=project_name, + ) + + op.alter_column( + "chunks", + "user_id", + new_column_name="owner_id", + schema=project_name, + ) + + +def downgrade() -> None: + + # Collections table migration + op.alter_column( + "collections", + "id", + new_column_name="collection_id", + schema=project_name, + ) + + op.add_column( + "collections", + sa.Column( + "kg_enrichment_status", + sa.Text, + server_default=sa.text("'pending'"), + ), + schema=project_name, + ) + + op.drop_column( + "collections", + "owner_id", + schema=project_name, + ) + + op.drop_column( + "collections", + "graph_sync_status", + schema=project_name, + ) + + op.drop_column( + "collections", + "graph_cluster_status", + schema=project_name, + ) + + # Documents table migration + op.rename_table( + "documents", + "document_info", + schema=project_name, + ) + + op.alter_column( + "document_info", + "id", + new_column_name="document_id", + schema=project_name, + ) + + op.alter_column( + "document_info", + "owner_id", + new_column_name="user_id", + schema=project_name, + ) + + op.add_column( + "document_info", + sa.Column( + "kg_extraction_status", + sa.Text, + server_default=sa.text("'pending'"), + ), + schema=project_name, + ) + + op.drop_column( + "document_info", + "extraction_status", + schema=project_name, + ) + + op.alter_column( + "document_info", + "raw_tsvector", + new_column_name="doc_search_vector", + schema=project_name, + ) + + # Files table migration + op.rename_table( + "files", + "file_storage", + schema=project_name, + ) + + op.alter_column( + "file_storage", + "name", + new_column_name="file_name", + schema=project_name, + ) + + op.alter_column( + "file_storage", + "oid", + new_column_name="file_oid", + schema=project_name, + ) + + op.alter_column( + "file_storage", + "size", + new_column_name="file_size", + schema=project_name, + ) + + op.alter_column( + "file_storage", + "type", + new_column_name="file_type", + schema=project_name, + ) + + # Prompts table migration + op.alter_column( + "prompts", + "id", + new_column_name="prompt_id", + schema=project_name, + ) + + # Users table migration + op.alter_column( + "users", + "id", + new_column_name="user_id", + schema=project_name, + ) + + # Chunks table migration + op.rename_table( + "chunks", + "vectors", + schema=project_name, + ) + + op.alter_column( + "vectors", + "id", + new_column_name="extraction_id", + schema=project_name, + ) + + op.alter_column( + "vectors", + "owner_id", + new_column_name="user_id", + schema=project_name, + ) diff --git a/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py b/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py index d2a1a7013..01d4e04fe 100644 --- a/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py +++ b/py/migrations/versions/d342e632358a_migrate_to_asyncpg.py @@ -186,7 +186,7 @@ def downgrade() -> None: f"ALTER TABLE IF EXISTS {project_name}.chunk_entity RENAME TO entity_raw" ) op.execute( - f"ALTER TABLE IF EXISTS {project_name}.chunk_triple RENAME TO triple_raw" + f"ALTER TABLE IF EXISTS {project_name}.chunk_relationship RENAME TO relationship_raw" ) op.execute( f"ALTER TABLE IF EXISTS {project_name}.document_entity RENAME TO entity_embedding" diff --git a/py/poetry.lock b/py/poetry.lock index f841c913f..8ff6cbb25 100644 --- a/py/poetry.lock +++ b/py/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiofiles" @@ -13,123 +13,109 @@ files = [ [[package]] name = "aiohappyeyeballs" -version = "2.4.3" +version = "2.4.4" description = "Happy Eyeballs for asyncio" optional = false python-versions = ">=3.8" files = [ - {file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"}, - {file = "aiohappyeyeballs-2.4.3.tar.gz", hash = "sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586"}, + {file = "aiohappyeyeballs-2.4.4-py3-none-any.whl", hash = "sha256:a980909d50efcd44795c4afeca523296716d50cd756ddca6af8c65b996e27de8"}, + {file = "aiohappyeyeballs-2.4.4.tar.gz", hash = "sha256:5fdd7d87889c63183afc18ce9271f9b0a7d32c2303e394468dd45d514a757745"}, ] [[package]] name = "aiohttp" -version = "3.10.10" +version = "3.11.9" description = "Async http client/server framework (asyncio)" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be7443669ae9c016b71f402e43208e13ddf00912f47f623ee5994e12fc7d4b3f"}, - {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b06b7843929e41a94ea09eb1ce3927865387e3e23ebe108e0d0d09b08d25be9"}, - {file = "aiohttp-3.10.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:333cf6cf8e65f6a1e06e9eb3e643a0c515bb850d470902274239fea02033e9a8"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:274cfa632350225ce3fdeb318c23b4a10ec25c0e2c880eff951a3842cf358ac1"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9e5e4a85bdb56d224f412d9c98ae4cbd032cc4f3161818f692cd81766eee65a"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b606353da03edcc71130b52388d25f9a30a126e04caef1fd637e31683033abd"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab5a5a0c7a7991d90446a198689c0535be89bbd6b410a1f9a66688f0880ec026"}, - {file = "aiohttp-3.10.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:578a4b875af3e0daaf1ac6fa983d93e0bbfec3ead753b6d6f33d467100cdc67b"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:8105fd8a890df77b76dd3054cddf01a879fc13e8af576805d667e0fa0224c35d"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3bcd391d083f636c06a68715e69467963d1f9600f85ef556ea82e9ef25f043f7"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fbc6264158392bad9df19537e872d476f7c57adf718944cc1e4495cbabf38e2a"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e48d5021a84d341bcaf95c8460b152cfbad770d28e5fe14a768988c461b821bc"}, - {file = "aiohttp-3.10.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2609e9ab08474702cc67b7702dbb8a80e392c54613ebe80db7e8dbdb79837c68"}, - {file = "aiohttp-3.10.10-cp310-cp310-win32.whl", hash = "sha256:84afcdea18eda514c25bc68b9af2a2b1adea7c08899175a51fe7c4fb6d551257"}, - {file = "aiohttp-3.10.10-cp310-cp310-win_amd64.whl", hash = "sha256:9c72109213eb9d3874f7ac8c0c5fa90e072d678e117d9061c06e30c85b4cf0e6"}, - {file = "aiohttp-3.10.10-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c30a0eafc89d28e7f959281b58198a9fa5e99405f716c0289b7892ca345fe45f"}, - {file = "aiohttp-3.10.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:258c5dd01afc10015866114e210fb7365f0d02d9d059c3c3415382ab633fcbcb"}, - {file = "aiohttp-3.10.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:15ecd889a709b0080f02721255b3f80bb261c2293d3c748151274dfea93ac871"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f3935f82f6f4a3820270842e90456ebad3af15810cf65932bd24da4463bc0a4c"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:413251f6fcf552a33c981c4709a6bba37b12710982fec8e558ae944bfb2abd38"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1720b4f14c78a3089562b8875b53e36b51c97c51adc53325a69b79b4b48ebcb"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:679abe5d3858b33c2cf74faec299fda60ea9de62916e8b67e625d65bf069a3b7"}, - {file = "aiohttp-3.10.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:79019094f87c9fb44f8d769e41dbb664d6e8fcfd62f665ccce36762deaa0e911"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:fe2fb38c2ed905a2582948e2de560675e9dfbee94c6d5ccdb1301c6d0a5bf092"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:a3f00003de6eba42d6e94fabb4125600d6e484846dbf90ea8e48a800430cc142"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:1bbb122c557a16fafc10354b9d99ebf2f2808a660d78202f10ba9d50786384b9"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:30ca7c3b94708a9d7ae76ff281b2f47d8eaf2579cd05971b5dc681db8caac6e1"}, - {file = "aiohttp-3.10.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:df9270660711670e68803107d55c2b5949c2e0f2e4896da176e1ecfc068b974a"}, - {file = "aiohttp-3.10.10-cp311-cp311-win32.whl", hash = "sha256:aafc8ee9b742ce75044ae9a4d3e60e3d918d15a4c2e08a6c3c3e38fa59b92d94"}, - {file = "aiohttp-3.10.10-cp311-cp311-win_amd64.whl", hash = "sha256:362f641f9071e5f3ee6f8e7d37d5ed0d95aae656adf4ef578313ee585b585959"}, - {file = "aiohttp-3.10.10-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:9294bbb581f92770e6ed5c19559e1e99255e4ca604a22c5c6397b2f9dd3ee42c"}, - {file = "aiohttp-3.10.10-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a8fa23fe62c436ccf23ff930149c047f060c7126eae3ccea005f0483f27b2e28"}, - {file = "aiohttp-3.10.10-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5c6a5b8c7926ba5d8545c7dd22961a107526562da31a7a32fa2456baf040939f"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:007ec22fbc573e5eb2fb7dec4198ef8f6bf2fe4ce20020798b2eb5d0abda6138"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9627cc1a10c8c409b5822a92d57a77f383b554463d1884008e051c32ab1b3742"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:50edbcad60d8f0e3eccc68da67f37268b5144ecc34d59f27a02f9611c1d4eec7"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a45d85cf20b5e0d0aa5a8dca27cce8eddef3292bc29d72dcad1641f4ed50aa16"}, - {file = "aiohttp-3.10.10-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0b00807e2605f16e1e198f33a53ce3c4523114059b0c09c337209ae55e3823a8"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:f2d4324a98062be0525d16f768a03e0bbb3b9fe301ceee99611dc9a7953124e6"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:438cd072f75bb6612f2aca29f8bd7cdf6e35e8f160bc312e49fbecab77c99e3a"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:baa42524a82f75303f714108fea528ccacf0386af429b69fff141ffef1c534f9"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a7d8d14fe962153fc681f6366bdec33d4356f98a3e3567782aac1b6e0e40109a"}, - {file = "aiohttp-3.10.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c1277cd707c465cd09572a774559a3cc7c7a28802eb3a2a9472588f062097205"}, - {file = "aiohttp-3.10.10-cp312-cp312-win32.whl", hash = "sha256:59bb3c54aa420521dc4ce3cc2c3fe2ad82adf7b09403fa1f48ae45c0cbde6628"}, - {file = "aiohttp-3.10.10-cp312-cp312-win_amd64.whl", hash = "sha256:0e1b370d8007c4ae31ee6db7f9a2fe801a42b146cec80a86766e7ad5c4a259cf"}, - {file = "aiohttp-3.10.10-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ad7593bb24b2ab09e65e8a1d385606f0f47c65b5a2ae6c551db67d6653e78c28"}, - {file = "aiohttp-3.10.10-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:1eb89d3d29adaf533588f209768a9c02e44e4baf832b08118749c5fad191781d"}, - {file = "aiohttp-3.10.10-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3fe407bf93533a6fa82dece0e74dbcaaf5d684e5a51862887f9eaebe6372cd79"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50aed5155f819873d23520919e16703fc8925e509abbb1a1491b0087d1cd969e"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f05e9727ce409358baa615dbeb9b969db94324a79b5a5cea45d39bdb01d82e6"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3dffb610a30d643983aeb185ce134f97f290f8935f0abccdd32c77bed9388b42"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aa6658732517ddabe22c9036479eabce6036655ba87a0224c612e1ae6af2087e"}, - {file = "aiohttp-3.10.10-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:741a46d58677d8c733175d7e5aa618d277cd9d880301a380fd296975a9cdd7bc"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e00e3505cd80440f6c98c6d69269dcc2a119f86ad0a9fd70bccc59504bebd68a"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ffe595f10566f8276b76dc3a11ae4bb7eba1aac8ddd75811736a15b0d5311414"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdfcf6443637c148c4e1a20c48c566aa694fa5e288d34b20fcdc58507882fed3"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d183cf9c797a5291e8301790ed6d053480ed94070637bfaad914dd38b0981f67"}, - {file = "aiohttp-3.10.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:77abf6665ae54000b98b3c742bc6ea1d1fb31c394bcabf8b5d2c1ac3ebfe7f3b"}, - {file = "aiohttp-3.10.10-cp313-cp313-win32.whl", hash = "sha256:4470c73c12cd9109db8277287d11f9dd98f77fc54155fc71a7738a83ffcc8ea8"}, - {file = "aiohttp-3.10.10-cp313-cp313-win_amd64.whl", hash = "sha256:486f7aabfa292719a2753c016cc3a8f8172965cabb3ea2e7f7436c7f5a22a151"}, - {file = "aiohttp-3.10.10-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:1b66ccafef7336a1e1f0e389901f60c1d920102315a56df85e49552308fc0486"}, - {file = "aiohttp-3.10.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:acd48d5b80ee80f9432a165c0ac8cbf9253eaddb6113269a5e18699b33958dbb"}, - {file = "aiohttp-3.10.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3455522392fb15ff549d92fbf4b73b559d5e43dc522588f7eb3e54c3f38beee7"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45c3b868724137f713a38376fef8120c166d1eadd50da1855c112fe97954aed8"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:da1dee8948d2137bb51fbb8a53cce6b1bcc86003c6b42565f008438b806cccd8"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5ce2ce7c997e1971b7184ee37deb6ea9922ef5163c6ee5aa3c274b05f9e12fa"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28529e08fde6f12eba8677f5a8608500ed33c086f974de68cc65ab218713a59d"}, - {file = "aiohttp-3.10.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f7db54c7914cc99d901d93a34704833568d86c20925b2762f9fa779f9cd2e70f"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:03a42ac7895406220124c88911ebee31ba8b2d24c98507f4a8bf826b2937c7f2"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:7e338c0523d024fad378b376a79faff37fafb3c001872a618cde1d322400a572"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:038f514fe39e235e9fef6717fbf944057bfa24f9b3db9ee551a7ecf584b5b480"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:64f6c17757251e2b8d885d728b6433d9d970573586a78b78ba8929b0f41d045a"}, - {file = "aiohttp-3.10.10-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:93429602396f3383a797a2a70e5f1de5df8e35535d7806c9f91df06f297e109b"}, - {file = "aiohttp-3.10.10-cp38-cp38-win32.whl", hash = "sha256:c823bc3971c44ab93e611ab1a46b1eafeae474c0c844aff4b7474287b75fe49c"}, - {file = "aiohttp-3.10.10-cp38-cp38-win_amd64.whl", hash = "sha256:54ca74df1be3c7ca1cf7f4c971c79c2daf48d9aa65dea1a662ae18926f5bc8ce"}, - {file = "aiohttp-3.10.10-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:01948b1d570f83ee7bbf5a60ea2375a89dfb09fd419170e7f5af029510033d24"}, - {file = "aiohttp-3.10.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9fc1500fd2a952c5c8e3b29aaf7e3cc6e27e9cfc0a8819b3bce48cc1b849e4cc"}, - {file = "aiohttp-3.10.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f614ab0c76397661b90b6851a030004dac502e48260ea10f2441abd2207fbcc7"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:00819de9e45d42584bed046314c40ea7e9aea95411b38971082cad449392b08c"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:05646ebe6b94cc93407b3bf34b9eb26c20722384d068eb7339de802154d61bc5"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:998f3bd3cfc95e9424a6acd7840cbdd39e45bc09ef87533c006f94ac47296090"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d9010c31cd6fa59438da4e58a7f19e4753f7f264300cd152e7f90d4602449762"}, - {file = "aiohttp-3.10.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ea7ffc6d6d6f8a11e6f40091a1040995cdff02cfc9ba4c2f30a516cb2633554"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ef9c33cc5cbca35808f6c74be11eb7f5f6b14d2311be84a15b594bd3e58b5527"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ce0cdc074d540265bfeb31336e678b4e37316849d13b308607efa527e981f5c2"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:597a079284b7ee65ee102bc3a6ea226a37d2b96d0418cc9047490f231dc09fe8"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:7789050d9e5d0c309c706953e5e8876e38662d57d45f936902e176d19f1c58ab"}, - {file = "aiohttp-3.10.10-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e7f8b04d83483577fd9200461b057c9f14ced334dcb053090cea1da9c8321a91"}, - {file = "aiohttp-3.10.10-cp39-cp39-win32.whl", hash = "sha256:c02a30b904282777d872266b87b20ed8cc0d1501855e27f831320f471d54d983"}, - {file = "aiohttp-3.10.10-cp39-cp39-win_amd64.whl", hash = "sha256:edfe3341033a6b53a5c522c802deb2079eee5cbfbb0af032a55064bd65c73a23"}, - {file = "aiohttp-3.10.10.tar.gz", hash = "sha256:0631dd7c9f0822cc61c88586ca76d5b5ada26538097d0f1df510b082bad3411a"}, + {file = "aiohttp-3.11.9-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0411777249f25d11bd2964a230b3ffafcbed6cd65d0f2b132bc2b8f5b8c347c7"}, + {file = "aiohttp-3.11.9-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:499368eb904566fbdf1a3836a1532000ef1308f34a1bcbf36e6351904cced771"}, + {file = "aiohttp-3.11.9-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0b5a5009b0159a8f707879dc102b139466d8ec6db05103ec1520394fdd8ea02c"}, + {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:176f8bb8931da0613bb0ed16326d01330066bb1e172dd97e1e02b1c27383277b"}, + {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6435a66957cdba1a0b16f368bde03ce9c79c57306b39510da6ae5312a1a5b2c1"}, + {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:202f40fb686e5f93908eee0c75d1e6fbe50a43e9bd4909bf3bf4a56b560ca180"}, + {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39625703540feb50b6b7f938b3856d1f4886d2e585d88274e62b1bd273fae09b"}, + {file = "aiohttp-3.11.9-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c6beeac698671baa558e82fa160be9761cf0eb25861943f4689ecf9000f8ebd0"}, + {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:96726839a42429318017e67a42cca75d4f0d5248a809b3cc2e125445edd7d50d"}, + {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3f5461c77649358610fb9694e790956b4238ac5d9e697a17f63619c096469afe"}, + {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4313f3bc901255b22f01663eeeae167468264fdae0d32c25fc631d5d6e15b502"}, + {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:d6e274661c74195708fc4380a4ef64298926c5a50bb10fbae3d01627d7a075b7"}, + {file = "aiohttp-3.11.9-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:db2914de2559809fdbcf3e48f41b17a493b58cb7988d3e211f6b63126c55fe82"}, + {file = "aiohttp-3.11.9-cp310-cp310-win32.whl", hash = "sha256:27935716f8d62c1c73010428db310fd10136002cfc6d52b0ba7bdfa752d26066"}, + {file = "aiohttp-3.11.9-cp310-cp310-win_amd64.whl", hash = "sha256:afbe85b50ade42ddff5669947afde9e8a610e64d2c80be046d67ec4368e555fa"}, + {file = "aiohttp-3.11.9-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:afcda759a69c6a8be3aae764ec6733155aa4a5ad9aad4f398b52ba4037942fe3"}, + {file = "aiohttp-3.11.9-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c5bba6b83fde4ca233cfda04cbd4685ab88696b0c8eaf76f7148969eab5e248a"}, + {file = "aiohttp-3.11.9-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:442356e8924fe1a121f8c87866b0ecdc785757fd28924b17c20493961b3d6697"}, + {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f737fef6e117856400afee4f17774cdea392b28ecf058833f5eca368a18cf1bf"}, + {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea142255d4901b03f89cb6a94411ecec117786a76fc9ab043af8f51dd50b5313"}, + {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6e1e9e447856e9b7b3d38e1316ae9a8c92e7536ef48373de758ea055edfd5db5"}, + {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7f6173302f8a329ca5d1ee592af9e628d3ade87816e9958dcf7cdae2841def7"}, + {file = "aiohttp-3.11.9-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a7c6147c6306f537cff59409609508a1d2eff81199f0302dd456bb9e7ea50c39"}, + {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e9d036a9a41fc78e8a3f10a86c2fc1098fca8fab8715ba9eb999ce4788d35df0"}, + {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2ac9fd83096df36728da8e2f4488ac3b5602238f602706606f3702f07a13a409"}, + {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:d3108f0ad5c6b6d78eec5273219a5bbd884b4aacec17883ceefaac988850ce6e"}, + {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:96bbec47beb131bbf4bae05d8ef99ad9e5738f12717cfbbf16648b78b0232e87"}, + {file = "aiohttp-3.11.9-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:fc726c3fa8f606d07bd2b500e5dc4c0fd664c59be7788a16b9e34352c50b6b6b"}, + {file = "aiohttp-3.11.9-cp311-cp311-win32.whl", hash = "sha256:5720ebbc7a1b46c33a42d489d25d36c64c419f52159485e55589fbec648ea49a"}, + {file = "aiohttp-3.11.9-cp311-cp311-win_amd64.whl", hash = "sha256:17af09d963fa1acd7e4c280e9354aeafd9e3d47eaa4a6bfbd2171ad7da49f0c5"}, + {file = "aiohttp-3.11.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:c1f2d7fd583fc79c240094b3e7237d88493814d4b300d013a42726c35a734bc9"}, + {file = "aiohttp-3.11.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d4b8a1b6c7a68c73191f2ebd3bf66f7ce02f9c374e309bdb68ba886bbbf1b938"}, + {file = "aiohttp-3.11.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bd3f711f4c99da0091ced41dccdc1bcf8be0281dc314d6d9c6b6cf5df66f37a9"}, + {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44cb1a1326a0264480a789e6100dc3e07122eb8cd1ad6b784a3d47d13ed1d89c"}, + {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7a7ddf981a0b953ade1c2379052d47ccda2f58ab678fca0671c7c7ca2f67aac2"}, + {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6ffa45cc55b18d4ac1396d1ddb029f139b1d3480f1594130e62bceadf2e1a838"}, + {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cca505829cdab58c2495ff418c96092d225a1bbd486f79017f6de915580d3c44"}, + {file = "aiohttp-3.11.9-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44d323aa80a867cb6db6bebb4bbec677c6478e38128847f2c6b0f70eae984d72"}, + {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b2fab23003c4bb2249729a7290a76c1dda38c438300fdf97d4e42bf78b19c810"}, + {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:be0c7c98e38a1e3ad7a6ff64af8b6d6db34bf5a41b1478e24c3c74d9e7f8ed42"}, + {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5cc5e0d069c56645446c45a4b5010d4b33ac6c5ebfd369a791b5f097e46a3c08"}, + {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:9bcf97b971289be69638d8b1b616f7e557e1342debc7fc86cf89d3f08960e411"}, + {file = "aiohttp-3.11.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c7333e7239415076d1418dbfb7fa4df48f3a5b00f8fdf854fca549080455bc14"}, + {file = "aiohttp-3.11.9-cp312-cp312-win32.whl", hash = "sha256:9384b07cfd3045b37b05ed002d1c255db02fb96506ad65f0f9b776b762a7572e"}, + {file = "aiohttp-3.11.9-cp312-cp312-win_amd64.whl", hash = "sha256:f5252ba8b43906f206048fa569debf2cd0da0316e8d5b4d25abe53307f573941"}, + {file = "aiohttp-3.11.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:282e0a7ddd36ebc411f156aeaa0491e8fe7f030e2a95da532cf0c84b0b70bc66"}, + {file = "aiohttp-3.11.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ebd3e6b0c7d4954cca59d241970011f8d3327633d555051c430bd09ff49dc494"}, + {file = "aiohttp-3.11.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:30f9f89ae625d412043f12ca3771b2ccec227cc93b93bb1f994db6e1af40a7d3"}, + {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7a3b5b2c012d70c63d9d13c57ed1603709a4d9d7d473e4a9dfece0e4ea3d5f51"}, + {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6ef1550bb5f55f71b97a6a395286db07f7f2c01c8890e613556df9a51da91e8d"}, + {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:317251b9c9a2f1a9ff9cd093775b34c6861d1d7df9439ce3d32a88c275c995cd"}, + {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21cbe97839b009826a61b143d3ca4964c8590d7aed33d6118125e5b71691ca46"}, + {file = "aiohttp-3.11.9-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:618b18c3a2360ac940a5503da14fa4f880c5b9bc315ec20a830357bcc62e6bae"}, + {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a0cf4d814689e58f57ecd5d8c523e6538417ca2e72ff52c007c64065cef50fb2"}, + {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:15c4e489942d987d5dac0ba39e5772dcbed4cc9ae3710d1025d5ba95e4a5349c"}, + {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:ec8df0ff5a911c6d21957a9182402aad7bf060eaeffd77c9ea1c16aecab5adbf"}, + {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:ed95d66745f53e129e935ad726167d3a6cb18c5d33df3165974d54742c373868"}, + {file = "aiohttp-3.11.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:647ec5bee7e4ec9f1034ab48173b5fa970d9a991e565549b965e93331f1328fe"}, + {file = "aiohttp-3.11.9-cp313-cp313-win32.whl", hash = "sha256:ef2c9499b7bd1e24e473dc1a85de55d72fd084eea3d8bdeec7ee0720decb54fa"}, + {file = "aiohttp-3.11.9-cp313-cp313-win_amd64.whl", hash = "sha256:84de955314aa5e8d469b00b14d6d714b008087a0222b0f743e7ffac34ef56aff"}, + {file = "aiohttp-3.11.9-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:e738aabff3586091221044b7a584865ddc4d6120346d12e28e788307cd731043"}, + {file = "aiohttp-3.11.9-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:28f29bce89c3b401a53d6fd4bee401ee943083bf2bdc12ef297c1d63155070b0"}, + {file = "aiohttp-3.11.9-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:31de2f10f63f96cc19e04bd2df9549559beadd0b2ee2da24a17e7ed877ca8c60"}, + {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77f31cebd8c27a36af6c7346055ac564946e562080ee1a838da724585c67474f"}, + {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0bcb7f6976dc0b6b56efde13294862adf68dd48854111b422a336fa729a82ea6"}, + {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a8b13b9950d8b2f8f58b6e5842c4b842b5887e2c32e3f4644d6642f1659a530"}, + {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9c23e62f3545c2216100603614f9e019e41b9403c47dd85b8e7e5015bf1bde0"}, + {file = "aiohttp-3.11.9-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec656680fc53a13f849c71afd0c84a55c536206d524cbc831cde80abbe80489e"}, + {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:36df00e0541f264ce42d62280281541a47474dfda500bc5b7f24f70a7f87be7a"}, + {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:8dcfd14c712aa9dd18049280bfb2f95700ff6a8bde645e09f17c3ed3f05a0130"}, + {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:14624d96f0d69cf451deed3173079a68c322279be6030208b045ab77e1e8d550"}, + {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:4b01d9cfcb616eeb6d40f02e66bebfe7b06d9f2ef81641fdd50b8dd981166e0b"}, + {file = "aiohttp-3.11.9-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:928f92f80e2e8d6567b87d3316c1fd9860ccfe36e87a9a7f5237d4cda8baa1ba"}, + {file = "aiohttp-3.11.9-cp39-cp39-win32.whl", hash = "sha256:c8a02f74ae419e3955af60f570d83187423e42e672a6433c5e292f1d23619269"}, + {file = "aiohttp-3.11.9-cp39-cp39-win_amd64.whl", hash = "sha256:0a97d657f6cf8782a830bb476c13f7d777cfcab8428ac49dde15c22babceb361"}, + {file = "aiohttp-3.11.9.tar.gz", hash = "sha256:a9266644064779840feec0e34f10a89b3ff1d2d6b751fe90017abcad1864fa7c"}, ] [package.dependencies] aiohappyeyeballs = ">=2.3.0" aiosignal = ">=1.1.2" -async-timeout = {version = ">=4.0,<5.0", markers = "python_version < \"3.11\""} +async-timeout = {version = ">=4.0,<6.0", markers = "python_version < \"3.11\""} attrs = ">=17.3.0" frozenlist = ">=1.1.1" multidict = ">=4.5,<7.0" -yarl = ">=1.12.0,<2.0" +propcache = ">=0.2.0" +yarl = ">=1.17.0,<2.0" [package.extras] speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"] @@ -288,41 +274,40 @@ six = "*" [[package]] name = "apscheduler" -version = "3.10.4" +version = "3.11.0" description = "In-process task scheduler with Cron-like capabilities" optional = true -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "APScheduler-3.10.4-py3-none-any.whl", hash = "sha256:fb91e8a768632a4756a585f79ec834e0e27aad5860bac7eaa523d9ccefd87661"}, - {file = "APScheduler-3.10.4.tar.gz", hash = "sha256:e6df071b27d9be898e486bc7940a7be50b4af2e9da7c08f0744a96d4bd4cef4a"}, + {file = "APScheduler-3.11.0-py3-none-any.whl", hash = "sha256:fc134ca32e50f5eadcc4938e3a4545ab19131435e851abb40b34d63d5141c6da"}, + {file = "apscheduler-3.11.0.tar.gz", hash = "sha256:4c622d250b0955a65d5d0eb91c33e6d43fd879834bf541e0a18661ae60460133"}, ] [package.dependencies] -pytz = "*" -six = ">=1.4.0" -tzlocal = ">=2.0,<3.dev0 || >=4.dev0" +tzlocal = ">=3.0" [package.extras] -doc = ["sphinx", "sphinx-rtd-theme"] +doc = ["packaging", "sphinx", "sphinx-rtd-theme (>=1.3.0)"] +etcd = ["etcd3", "protobuf (<=3.21.0)"] gevent = ["gevent"] mongodb = ["pymongo (>=3.0)"] redis = ["redis (>=3.0)"] rethinkdb = ["rethinkdb (>=2.4.0)"] sqlalchemy = ["sqlalchemy (>=1.4)"] -testing = ["pytest", "pytest-asyncio", "pytest-cov", "pytest-tornado5"] +test = ["APScheduler[etcd,mongodb,redis,rethinkdb,sqlalchemy,tornado,zookeeper]", "PySide6", "anyio (>=4.5.2)", "gevent", "pytest", "pytz", "twisted"] tornado = ["tornado (>=4.3)"] twisted = ["twisted"] zookeeper = ["kazoo"] [[package]] name = "async-timeout" -version = "4.0.3" +version = "5.0.1" description = "Timeout context manager for asyncio programs" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"}, - {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, + {file = "async_timeout-5.0.1-py3-none-any.whl", hash = "sha256:39e3809566ff85354557ec2398b55e096c8364bacac9405a7a1fa429e77fe76c"}, + {file = "async_timeout-5.0.1.tar.gz", hash = "sha256:d9321a7a3d5a6a5e187e824d2fa0793ce379a202935782d555d6e9d2735677d3"}, ] [[package]] @@ -446,38 +431,36 @@ files = [ [[package]] name = "bcrypt" -version = "4.2.0" +version = "4.2.1" description = "Modern password hashing for your software and your servers" optional = true python-versions = ">=3.7" files = [ - {file = "bcrypt-4.2.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:096a15d26ed6ce37a14c1ac1e48119660f21b24cba457f160a4b830f3fe6b5cb"}, - {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c02d944ca89d9b1922ceb8a46460dd17df1ba37ab66feac4870f6862a1533c00"}, - {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d84cf6d877918620b687b8fd1bf7781d11e8a0998f576c7aa939776b512b98d"}, - {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1bb429fedbe0249465cdd85a58e8376f31bb315e484f16e68ca4c786dcc04291"}, - {file = "bcrypt-4.2.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:655ea221910bcac76ea08aaa76df427ef8625f92e55a8ee44fbf7753dbabb328"}, - {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:1ee38e858bf5d0287c39b7a1fc59eec64bbf880c7d504d3a06a96c16e14058e7"}, - {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:0da52759f7f30e83f1e30a888d9163a81353ef224d82dc58eb5bb52efcabc399"}, - {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3698393a1b1f1fd5714524193849d0c6d524d33523acca37cd28f02899285060"}, - {file = "bcrypt-4.2.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:762a2c5fb35f89606a9fde5e51392dad0cd1ab7ae64149a8b935fe8d79dd5ed7"}, - {file = "bcrypt-4.2.0-cp37-abi3-win32.whl", hash = "sha256:5a1e8aa9b28ae28020a3ac4b053117fb51c57a010b9f969603ed885f23841458"}, - {file = "bcrypt-4.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:8f6ede91359e5df88d1f5c1ef47428a4420136f3ce97763e31b86dd8280fbdf5"}, - {file = "bcrypt-4.2.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:c52aac18ea1f4a4f65963ea4f9530c306b56ccd0c6f8c8da0c06976e34a6e841"}, - {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3bbbfb2734f0e4f37c5136130405332640a1e46e6b23e000eeff2ba8d005da68"}, - {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3413bd60460f76097ee2e0a493ccebe4a7601918219c02f503984f0a7ee0aebe"}, - {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8d7bb9c42801035e61c109c345a28ed7e84426ae4865511eb82e913df18f58c2"}, - {file = "bcrypt-4.2.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3d3a6d28cb2305b43feac298774b997e372e56c7c7afd90a12b3dc49b189151c"}, - {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:9c1c4ad86351339c5f320ca372dfba6cb6beb25e8efc659bedd918d921956bae"}, - {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:27fe0f57bb5573104b5a6de5e4153c60814c711b29364c10a75a54bb6d7ff48d"}, - {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:8ac68872c82f1add6a20bd489870c71b00ebacd2e9134a8aa3f98a0052ab4b0e"}, - {file = "bcrypt-4.2.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:cb2a8ec2bc07d3553ccebf0746bbf3d19426d1c6d1adbd4fa48925f66af7b9e8"}, - {file = "bcrypt-4.2.0-cp39-abi3-win32.whl", hash = "sha256:77800b7147c9dc905db1cba26abe31e504d8247ac73580b4aa179f98e6608f34"}, - {file = "bcrypt-4.2.0-cp39-abi3-win_amd64.whl", hash = "sha256:61ed14326ee023917ecd093ee6ef422a72f3aec6f07e21ea5f10622b735538a9"}, - {file = "bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:39e1d30c7233cfc54f5c3f2c825156fe044efdd3e0b9d309512cc514a263ec2a"}, - {file = "bcrypt-4.2.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f4f4acf526fcd1c34e7ce851147deedd4e26e6402369304220250598b26448db"}, - {file = "bcrypt-4.2.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:1ff39b78a52cf03fdf902635e4c81e544714861ba3f0efc56558979dd4f09170"}, - {file = "bcrypt-4.2.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:373db9abe198e8e2c70d12b479464e0d5092cc122b20ec504097b5f2297ed184"}, - {file = "bcrypt-4.2.0.tar.gz", hash = "sha256:cf69eaf5185fd58f268f805b505ce31f9b9fc2d64b376642164e9244540c1221"}, + {file = "bcrypt-4.2.1-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:1340411a0894b7d3ef562fb233e4b6ed58add185228650942bdc885362f32c17"}, + {file = "bcrypt-4.2.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1ee315739bc8387aa36ff127afc99120ee452924e0df517a8f3e4c0187a0f5f"}, + {file = "bcrypt-4.2.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dbd0747208912b1e4ce730c6725cb56c07ac734b3629b60d4398f082ea718ad"}, + {file = "bcrypt-4.2.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:aaa2e285be097050dba798d537b6efd9b698aa88eef52ec98d23dcd6d7cf6fea"}, + {file = "bcrypt-4.2.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:76d3e352b32f4eeb34703370e370997065d28a561e4a18afe4fef07249cb4396"}, + {file = "bcrypt-4.2.1-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:b7703ede632dc945ed1172d6f24e9f30f27b1b1a067f32f68bf169c5f08d0425"}, + {file = "bcrypt-4.2.1-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:89df2aea2c43be1e1fa066df5f86c8ce822ab70a30e4c210968669565c0f4685"}, + {file = "bcrypt-4.2.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:04e56e3fe8308a88b77e0afd20bec516f74aecf391cdd6e374f15cbed32783d6"}, + {file = "bcrypt-4.2.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:cfdf3d7530c790432046c40cda41dfee8c83e29482e6a604f8930b9930e94139"}, + {file = "bcrypt-4.2.1-cp37-abi3-win32.whl", hash = "sha256:adadd36274510a01f33e6dc08f5824b97c9580583bd4487c564fc4617b328005"}, + {file = "bcrypt-4.2.1-cp37-abi3-win_amd64.whl", hash = "sha256:8c458cd103e6c5d1d85cf600e546a639f234964d0228909d8f8dbeebff82d526"}, + {file = "bcrypt-4.2.1-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:8ad2f4528cbf0febe80e5a3a57d7a74e6635e41af1ea5675282a33d769fba413"}, + {file = "bcrypt-4.2.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:909faa1027900f2252a9ca5dfebd25fc0ef1417943824783d1c8418dd7d6df4a"}, + {file = "bcrypt-4.2.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cde78d385d5e93ece5479a0a87f73cd6fa26b171c786a884f955e165032b262c"}, + {file = "bcrypt-4.2.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:533e7f3bcf2f07caee7ad98124fab7499cb3333ba2274f7a36cf1daee7409d99"}, + {file = "bcrypt-4.2.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:687cf30e6681eeda39548a93ce9bfbb300e48b4d445a43db4298d2474d2a1e54"}, + {file = "bcrypt-4.2.1-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:041fa0155c9004eb98a232d54da05c0b41d4b8e66b6fc3cb71b4b3f6144ba837"}, + {file = "bcrypt-4.2.1-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f85b1ffa09240c89aa2e1ae9f3b1c687104f7b2b9d2098da4e923f1b7082d331"}, + {file = "bcrypt-4.2.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c6f5fa3775966cca251848d4d5393ab016b3afed251163c1436fefdec3b02c84"}, + {file = "bcrypt-4.2.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:807261df60a8b1ccd13e6599c779014a362ae4e795f5c59747f60208daddd96d"}, + {file = "bcrypt-4.2.1-cp39-abi3-win32.whl", hash = "sha256:b588af02b89d9fad33e5f98f7838bf590d6d692df7153647724a7f20c186f6bf"}, + {file = "bcrypt-4.2.1-cp39-abi3-win_amd64.whl", hash = "sha256:e84e0e6f8e40a242b11bce56c313edc2be121cec3e0ec2d76fce01f6af33c07c"}, + {file = "bcrypt-4.2.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:76132c176a6d9953cdc83c296aeaed65e1a708485fd55abf163e0d9f8f16ce0e"}, + {file = "bcrypt-4.2.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e158009a54c4c8bc91d5e0da80920d048f918c61a581f0a63e4e93bb556d362f"}, + {file = "bcrypt-4.2.1.tar.gz", hash = "sha256:6765386e3ab87f569b276988742039baab087b2cdb01e809d74e74503c2faafe"}, ] [package.extras] @@ -571,17 +554,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.35.57" +version = "1.35.74" description = "The AWS SDK for Python" optional = true python-versions = ">=3.8" files = [ - {file = "boto3-1.35.57-py3-none-any.whl", hash = "sha256:9edf49640c79a05b0a72f4c2d1e24dfc164344b680535a645f455ac624dc3680"}, - {file = "boto3-1.35.57.tar.gz", hash = "sha256:db58348849a5af061f0f5ec9c3b699da5221ca83354059fdccb798e3ddb6b62a"}, + {file = "boto3-1.35.74-py3-none-any.whl", hash = "sha256:dab5bddbbe57dc707b6f6a1f25dc2823b8e234b6fe99fafef7fc406ab73031b9"}, + {file = "boto3-1.35.74.tar.gz", hash = "sha256:88370c6845ba71a4dae7f6b357099df29b3965da584be040c8e72c9902bc9492"}, ] [package.dependencies] -botocore = ">=1.35.57,<1.36.0" +botocore = ">=1.35.74,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -590,13 +573,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.35.57" +version = "1.35.74" description = "Low-level, data-driven core of boto 3." optional = true python-versions = ">=3.8" files = [ - {file = "botocore-1.35.57-py3-none-any.whl", hash = "sha256:92ddd02469213766872cb2399269dd20948f90348b42bf08379881d5e946cc34"}, - {file = "botocore-1.35.57.tar.gz", hash = "sha256:d96306558085baf0bcb3b022d7a8c39c93494f031edb376694d2b2dcd0e81327"}, + {file = "botocore-1.35.74-py3-none-any.whl", hash = "sha256:9ac9d33d84dd9f05b35085de081552342a2c9ae22e3c4ee105723c9e92c07bd9"}, + {file = "botocore-1.35.74.tar.gz", hash = "sha256:de5c4fa9a24cef3a758974857b5c5820a12fad345ebf33c052a5988e88f33634"}, ] [package.dependencies] @@ -850,21 +833,6 @@ files = [ [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} -[[package]] -name = "codecov" -version = "2.1.13" -description = "Hosted coverage reports for GitHub, Bitbucket and Gitlab" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "codecov-2.1.13-py2.py3-none-any.whl", hash = "sha256:c2ca5e51bba9ebb43644c43d0690148a55086f7f5e6fd36170858fa4206744d5"}, - {file = "codecov-2.1.13.tar.gz", hash = "sha256:2362b685633caeaf45b9951a9b76ce359cd3581dd515b430c6c3f5dfb4d92a8c"}, -] - -[package.dependencies] -coverage = "*" -requests = ">=2.7.9" - [[package]] name = "colorama" version = "0.4.6" @@ -878,76 +846,65 @@ files = [ [[package]] name = "contourpy" -version = "1.3.0" +version = "1.3.1" description = "Python library for calculating contours of 2D quadrilateral grids" optional = true -python-versions = ">=3.9" +python-versions = ">=3.10" files = [ - {file = "contourpy-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:880ea32e5c774634f9fcd46504bf9f080a41ad855f4fef54f5380f5133d343c7"}, - {file = "contourpy-1.3.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:76c905ef940a4474a6289c71d53122a4f77766eef23c03cd57016ce19d0f7b42"}, - {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:92f8557cbb07415a4d6fa191f20fd9d2d9eb9c0b61d1b2f52a8926e43c6e9af7"}, - {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36f965570cff02b874773c49bfe85562b47030805d7d8360748f3eca570f4cab"}, - {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cacd81e2d4b6f89c9f8a5b69b86490152ff39afc58a95af002a398273e5ce589"}, - {file = "contourpy-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69375194457ad0fad3a839b9e29aa0b0ed53bb54db1bfb6c3ae43d111c31ce41"}, - {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:7a52040312b1a858b5e31ef28c2e865376a386c60c0e248370bbea2d3f3b760d"}, - {file = "contourpy-1.3.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3faeb2998e4fcb256542e8a926d08da08977f7f5e62cf733f3c211c2a5586223"}, - {file = "contourpy-1.3.0-cp310-cp310-win32.whl", hash = "sha256:36e0cff201bcb17a0a8ecc7f454fe078437fa6bda730e695a92f2d9932bd507f"}, - {file = "contourpy-1.3.0-cp310-cp310-win_amd64.whl", hash = "sha256:87ddffef1dbe5e669b5c2440b643d3fdd8622a348fe1983fad7a0f0ccb1cd67b"}, - {file = "contourpy-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0fa4c02abe6c446ba70d96ece336e621efa4aecae43eaa9b030ae5fb92b309ad"}, - {file = "contourpy-1.3.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:834e0cfe17ba12f79963861e0f908556b2cedd52e1f75e6578801febcc6a9f49"}, - {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dbc4c3217eee163fa3984fd1567632b48d6dfd29216da3ded3d7b844a8014a66"}, - {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4865cd1d419e0c7a7bf6de1777b185eebdc51470800a9f42b9e9decf17762081"}, - {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:303c252947ab4b14c08afeb52375b26781ccd6a5ccd81abcdfc1fafd14cf93c1"}, - {file = "contourpy-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:637f674226be46f6ba372fd29d9523dd977a291f66ab2a74fbeb5530bb3f445d"}, - {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:76a896b2f195b57db25d6b44e7e03f221d32fe318d03ede41f8b4d9ba1bff53c"}, - {file = "contourpy-1.3.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:e1fd23e9d01591bab45546c089ae89d926917a66dceb3abcf01f6105d927e2cb"}, - {file = "contourpy-1.3.0-cp311-cp311-win32.whl", hash = "sha256:d402880b84df3bec6eab53cd0cf802cae6a2ef9537e70cf75e91618a3801c20c"}, - {file = "contourpy-1.3.0-cp311-cp311-win_amd64.whl", hash = "sha256:6cb6cc968059db9c62cb35fbf70248f40994dfcd7aa10444bbf8b3faeb7c2d67"}, - {file = "contourpy-1.3.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:570ef7cf892f0afbe5b2ee410c507ce12e15a5fa91017a0009f79f7d93a1268f"}, - {file = "contourpy-1.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:da84c537cb8b97d153e9fb208c221c45605f73147bd4cadd23bdae915042aad6"}, - {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0be4d8425bfa755e0fd76ee1e019636ccc7c29f77a7c86b4328a9eb6a26d0639"}, - {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9c0da700bf58f6e0b65312d0a5e695179a71d0163957fa381bb3c1f72972537c"}, - {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:eb8b141bb00fa977d9122636b16aa67d37fd40a3d8b52dd837e536d64b9a4d06"}, - {file = "contourpy-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3634b5385c6716c258d0419c46d05c8aa7dc8cb70326c9a4fb66b69ad2b52e09"}, - {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0dce35502151b6bd35027ac39ba6e5a44be13a68f55735c3612c568cac3805fd"}, - {file = "contourpy-1.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:aea348f053c645100612b333adc5983d87be69acdc6d77d3169c090d3b01dc35"}, - {file = "contourpy-1.3.0-cp312-cp312-win32.whl", hash = "sha256:90f73a5116ad1ba7174341ef3ea5c3150ddf20b024b98fb0c3b29034752c8aeb"}, - {file = "contourpy-1.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:b11b39aea6be6764f84360fce6c82211a9db32a7c7de8fa6dd5397cf1d079c3b"}, - {file = "contourpy-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3e1c7fa44aaae40a2247e2e8e0627f4bea3dd257014764aa644f319a5f8600e3"}, - {file = "contourpy-1.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:364174c2a76057feef647c802652f00953b575723062560498dc7930fc9b1cb7"}, - {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:32b238b3b3b649e09ce9aaf51f0c261d38644bdfa35cbaf7b263457850957a84"}, - {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d51fca85f9f7ad0b65b4b9fe800406d0d77017d7270d31ec3fb1cc07358fdea0"}, - {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:732896af21716b29ab3e988d4ce14bc5133733b85956316fb0c56355f398099b"}, - {file = "contourpy-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d73f659398a0904e125280836ae6f88ba9b178b2fed6884f3b1f95b989d2c8da"}, - {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c6c7c2408b7048082932cf4e641fa3b8ca848259212f51c8c59c45aa7ac18f14"}, - {file = "contourpy-1.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f317576606de89da6b7e0861cf6061f6146ead3528acabff9236458a6ba467f8"}, - {file = "contourpy-1.3.0-cp313-cp313-win32.whl", hash = "sha256:31cd3a85dbdf1fc002280c65caa7e2b5f65e4a973fcdf70dd2fdcb9868069294"}, - {file = "contourpy-1.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:4553c421929ec95fb07b3aaca0fae668b2eb5a5203d1217ca7c34c063c53d087"}, - {file = "contourpy-1.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:345af746d7766821d05d72cb8f3845dfd08dd137101a2cb9b24de277d716def8"}, - {file = "contourpy-1.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3bb3808858a9dc68f6f03d319acd5f1b8a337e6cdda197f02f4b8ff67ad2057b"}, - {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:420d39daa61aab1221567b42eecb01112908b2cab7f1b4106a52caaec8d36973"}, - {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d63ee447261e963af02642ffcb864e5a2ee4cbfd78080657a9880b8b1868e18"}, - {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:167d6c890815e1dac9536dca00828b445d5d0df4d6a8c6adb4a7ec3166812fa8"}, - {file = "contourpy-1.3.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:710a26b3dc80c0e4febf04555de66f5fd17e9cf7170a7b08000601a10570bda6"}, - {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:75ee7cb1a14c617f34a51d11fa7524173e56551646828353c4af859c56b766e2"}, - {file = "contourpy-1.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:33c92cdae89ec5135d036e7218e69b0bb2851206077251f04a6c4e0e21f03927"}, - {file = "contourpy-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a11077e395f67ffc2c44ec2418cfebed032cd6da3022a94fc227b6faf8e2acb8"}, - {file = "contourpy-1.3.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e8134301d7e204c88ed7ab50028ba06c683000040ede1d617298611f9dc6240c"}, - {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e12968fdfd5bb45ffdf6192a590bd8ddd3ba9e58360b29683c6bb71a7b41edca"}, - {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fd2a0fc506eccaaa7595b7e1418951f213cf8255be2600f1ea1b61e46a60c55f"}, - {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4cfb5c62ce023dfc410d6059c936dcf96442ba40814aefbfa575425a3a7f19dc"}, - {file = "contourpy-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68a32389b06b82c2fdd68276148d7b9275b5f5cf13e5417e4252f6d1a34f72a2"}, - {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:94e848a6b83da10898cbf1311a815f770acc9b6a3f2d646f330d57eb4e87592e"}, - {file = "contourpy-1.3.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d78ab28a03c854a873787a0a42254a0ccb3cb133c672f645c9f9c8f3ae9d0800"}, - {file = "contourpy-1.3.0-cp39-cp39-win32.whl", hash = "sha256:81cb5ed4952aae6014bc9d0421dec7c5835c9c8c31cdf51910b708f548cf58e5"}, - {file = "contourpy-1.3.0-cp39-cp39-win_amd64.whl", hash = "sha256:14e262f67bd7e6eb6880bc564dcda30b15e351a594657e55b7eec94b6ef72843"}, - {file = "contourpy-1.3.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:fe41b41505a5a33aeaed2a613dccaeaa74e0e3ead6dd6fd3a118fb471644fd6c"}, - {file = "contourpy-1.3.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eca7e17a65f72a5133bdbec9ecf22401c62bcf4821361ef7811faee695799779"}, - {file = "contourpy-1.3.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:1ec4dc6bf570f5b22ed0d7efba0dfa9c5b9e0431aeea7581aa217542d9e809a4"}, - {file = "contourpy-1.3.0-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:00ccd0dbaad6d804ab259820fa7cb0b8036bda0686ef844d24125d8287178ce0"}, - {file = "contourpy-1.3.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ca947601224119117f7c19c9cdf6b3ab54c5726ef1d906aa4a69dfb6dd58102"}, - {file = "contourpy-1.3.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:c6ec93afeb848a0845a18989da3beca3eec2c0f852322efe21af1931147d12cb"}, - {file = "contourpy-1.3.0.tar.gz", hash = "sha256:7ffa0db17717a8ffb127efd0c95a4362d996b892c2904db72428d5b52e1938a4"}, + {file = "contourpy-1.3.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a045f341a77b77e1c5de31e74e966537bba9f3c4099b35bf4c2e3939dd54cdab"}, + {file = "contourpy-1.3.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:500360b77259914f7805af7462e41f9cb7ca92ad38e9f94d6c8641b089338124"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b2f926efda994cdf3c8d3fdb40b9962f86edbc4457e739277b961eced3d0b4c1"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:adce39d67c0edf383647a3a007de0a45fd1b08dedaa5318404f1a73059c2512b"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:abbb49fb7dac584e5abc6636b7b2a7227111c4f771005853e7d25176daaf8453"}, + {file = "contourpy-1.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0cffcbede75c059f535725c1680dfb17b6ba8753f0c74b14e6a9c68c29d7ea3"}, + {file = "contourpy-1.3.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ab29962927945d89d9b293eabd0d59aea28d887d4f3be6c22deaefbb938a7277"}, + {file = "contourpy-1.3.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:974d8145f8ca354498005b5b981165b74a195abfae9a8129df3e56771961d595"}, + {file = "contourpy-1.3.1-cp310-cp310-win32.whl", hash = "sha256:ac4578ac281983f63b400f7fe6c101bedc10651650eef012be1ccffcbacf3697"}, + {file = "contourpy-1.3.1-cp310-cp310-win_amd64.whl", hash = "sha256:174e758c66bbc1c8576992cec9599ce8b6672b741b5d336b5c74e35ac382b18e"}, + {file = "contourpy-1.3.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3e8b974d8db2c5610fb4e76307e265de0edb655ae8169e8b21f41807ccbeec4b"}, + {file = "contourpy-1.3.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:20914c8c973f41456337652a6eeca26d2148aa96dd7ac323b74516988bea89fc"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19d40d37c1c3a4961b4619dd9d77b12124a453cc3d02bb31a07d58ef684d3d86"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:113231fe3825ebf6f15eaa8bc1f5b0ddc19d42b733345eae0934cb291beb88b6"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4dbbc03a40f916a8420e420d63e96a1258d3d1b58cbdfd8d1f07b49fcbd38e85"}, + {file = "contourpy-1.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3a04ecd68acbd77fa2d39723ceca4c3197cb2969633836ced1bea14e219d077c"}, + {file = "contourpy-1.3.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c414fc1ed8ee1dbd5da626cf3710c6013d3d27456651d156711fa24f24bd1291"}, + {file = "contourpy-1.3.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:31c1b55c1f34f80557d3830d3dd93ba722ce7e33a0b472cba0ec3b6535684d8f"}, + {file = "contourpy-1.3.1-cp311-cp311-win32.whl", hash = "sha256:f611e628ef06670df83fce17805c344710ca5cde01edfdc72751311da8585375"}, + {file = "contourpy-1.3.1-cp311-cp311-win_amd64.whl", hash = "sha256:b2bdca22a27e35f16794cf585832e542123296b4687f9fd96822db6bae17bfc9"}, + {file = "contourpy-1.3.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0ffa84be8e0bd33410b17189f7164c3589c229ce5db85798076a3fa136d0e509"}, + {file = "contourpy-1.3.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:805617228ba7e2cbbfb6c503858e626ab528ac2a32a04a2fe88ffaf6b02c32bc"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade08d343436a94e633db932e7e8407fe7de8083967962b46bdfc1b0ced39454"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:47734d7073fb4590b4a40122b35917cd77be5722d80683b249dac1de266aac80"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2ba94a401342fc0f8b948e57d977557fbf4d515f03c67682dd5c6191cb2d16ec"}, + {file = "contourpy-1.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efa874e87e4a647fd2e4f514d5e91c7d493697127beb95e77d2f7561f6905bd9"}, + {file = "contourpy-1.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:1bf98051f1045b15c87868dbaea84f92408337d4f81d0e449ee41920ea121d3b"}, + {file = "contourpy-1.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:61332c87493b00091423e747ea78200659dc09bdf7fd69edd5e98cef5d3e9a8d"}, + {file = "contourpy-1.3.1-cp312-cp312-win32.whl", hash = "sha256:e914a8cb05ce5c809dd0fe350cfbb4e881bde5e2a38dc04e3afe1b3e58bd158e"}, + {file = "contourpy-1.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:08d9d449a61cf53033612cb368f3a1b26cd7835d9b8cd326647efe43bca7568d"}, + {file = "contourpy-1.3.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a761d9ccfc5e2ecd1bf05534eda382aa14c3e4f9205ba5b1684ecfe400716ef2"}, + {file = "contourpy-1.3.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:523a8ee12edfa36f6d2a49407f705a6ef4c5098de4f498619787e272de93f2d5"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece6df05e2c41bd46776fbc712e0996f7c94e0d0543af1656956d150c4ca7c81"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:573abb30e0e05bf31ed067d2f82500ecfdaec15627a59d63ea2d95714790f5c2"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9fa36448e6a3a1a9a2ba23c02012c43ed88905ec80163f2ffe2421c7192a5d7"}, + {file = "contourpy-1.3.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ea9924d28fc5586bf0b42d15f590b10c224117e74409dd7a0be3b62b74a501c"}, + {file = "contourpy-1.3.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b75aa69cb4d6f137b36f7eb2ace9280cfb60c55dc5f61c731fdf6f037f958a3"}, + {file = "contourpy-1.3.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:041b640d4ec01922083645a94bb3b2e777e6b626788f4095cf21abbe266413c1"}, + {file = "contourpy-1.3.1-cp313-cp313-win32.whl", hash = "sha256:36987a15e8ace5f58d4d5da9dca82d498c2bbb28dff6e5d04fbfcc35a9cb3a82"}, + {file = "contourpy-1.3.1-cp313-cp313-win_amd64.whl", hash = "sha256:a7895f46d47671fa7ceec40f31fae721da51ad34bdca0bee83e38870b1f47ffd"}, + {file = "contourpy-1.3.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9ddeb796389dadcd884c7eb07bd14ef12408aaae358f0e2ae24114d797eede30"}, + {file = "contourpy-1.3.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:19c1555a6801c2f084c7ddc1c6e11f02eb6a6016ca1318dd5452ba3f613a1751"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:841ad858cff65c2c04bf93875e384ccb82b654574a6d7f30453a04f04af71342"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4318af1c925fb9a4fb190559ef3eec206845f63e80fb603d47f2d6d67683901c"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:14c102b0eab282427b662cb590f2e9340a9d91a1c297f48729431f2dcd16e14f"}, + {file = "contourpy-1.3.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05e806338bfeaa006acbdeba0ad681a10be63b26e1b17317bfac3c5d98f36cda"}, + {file = "contourpy-1.3.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4d76d5993a34ef3df5181ba3c92fabb93f1eaa5729504fb03423fcd9f3177242"}, + {file = "contourpy-1.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:89785bb2a1980c1bd87f0cb1517a71cde374776a5f150936b82580ae6ead44a1"}, + {file = "contourpy-1.3.1-cp313-cp313t-win32.whl", hash = "sha256:8eb96e79b9f3dcadbad2a3891672f81cdcab7f95b27f28f1c67d75f045b6b4f1"}, + {file = "contourpy-1.3.1-cp313-cp313t-win_amd64.whl", hash = "sha256:287ccc248c9e0d0566934e7d606201abd74761b5703d804ff3df8935f523d546"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b457d6430833cee8e4b8e9b6f07aa1c161e5e0d52e118dc102c8f9bd7dd060d6"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cb76c1a154b83991a3cbbf0dfeb26ec2833ad56f95540b442c73950af2013750"}, + {file = "contourpy-1.3.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:44a29502ca9c7b5ba389e620d44f2fbe792b1fb5734e8b931ad307071ec58c53"}, + {file = "contourpy-1.3.1.tar.gz", hash = "sha256:dfd97abd83335045a913e3bcc4a09c0ceadbe66580cf573fe961f4a825efa699"}, ] [package.dependencies] @@ -962,73 +919,73 @@ test-no-images = ["pytest", "pytest-cov", "pytest-rerunfailures", "pytest-xdist" [[package]] name = "coverage" -version = "7.6.4" +version = "7.6.8" description = "Code coverage measurement for Python" optional = false python-versions = ">=3.9" files = [ - {file = "coverage-7.6.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f8ae553cba74085db385d489c7a792ad66f7f9ba2ee85bfa508aeb84cf0ba07"}, - {file = "coverage-7.6.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8165b796df0bd42e10527a3f493c592ba494f16ef3c8b531288e3d0d72c1f6f0"}, - {file = "coverage-7.6.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c7c8b95bf47db6d19096a5e052ffca0a05f335bc63cef281a6e8fe864d450a72"}, - {file = "coverage-7.6.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ed9281d1b52628e81393f5eaee24a45cbd64965f41857559c2b7ff19385df51"}, - {file = "coverage-7.6.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0809082ee480bb8f7416507538243c8863ac74fd8a5d2485c46f0f7499f2b491"}, - {file = "coverage-7.6.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d541423cdd416b78626b55f123412fcf979d22a2c39fce251b350de38c15c15b"}, - {file = "coverage-7.6.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:58809e238a8a12a625c70450b48e8767cff9eb67c62e6154a642b21ddf79baea"}, - {file = "coverage-7.6.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c9b8e184898ed014884ca84c70562b4a82cbc63b044d366fedc68bc2b2f3394a"}, - {file = "coverage-7.6.4-cp310-cp310-win32.whl", hash = "sha256:6bd818b7ea14bc6e1f06e241e8234508b21edf1b242d49831831a9450e2f35fa"}, - {file = "coverage-7.6.4-cp310-cp310-win_amd64.whl", hash = "sha256:06babbb8f4e74b063dbaeb74ad68dfce9186c595a15f11f5d5683f748fa1d172"}, - {file = "coverage-7.6.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:73d2b73584446e66ee633eaad1a56aad577c077f46c35ca3283cd687b7715b0b"}, - {file = "coverage-7.6.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:51b44306032045b383a7a8a2c13878de375117946d68dcb54308111f39775a25"}, - {file = "coverage-7.6.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b3fb02fe73bed561fa12d279a417b432e5b50fe03e8d663d61b3d5990f29546"}, - {file = "coverage-7.6.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ed8fe9189d2beb6edc14d3ad19800626e1d9f2d975e436f84e19efb7fa19469b"}, - {file = "coverage-7.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b369ead6527d025a0fe7bd3864e46dbee3aa8f652d48df6174f8d0bac9e26e0e"}, - {file = "coverage-7.6.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:ade3ca1e5f0ff46b678b66201f7ff477e8fa11fb537f3b55c3f0568fbfe6e718"}, - {file = "coverage-7.6.4-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:27fb4a050aaf18772db513091c9c13f6cb94ed40eacdef8dad8411d92d9992db"}, - {file = "coverage-7.6.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4f704f0998911abf728a7783799444fcbbe8261c4a6c166f667937ae6a8aa522"}, - {file = "coverage-7.6.4-cp311-cp311-win32.whl", hash = "sha256:29155cd511ee058e260db648b6182c419422a0d2e9a4fa44501898cf918866cf"}, - {file = "coverage-7.6.4-cp311-cp311-win_amd64.whl", hash = "sha256:8902dd6a30173d4ef09954bfcb24b5d7b5190cf14a43170e386979651e09ba19"}, - {file = "coverage-7.6.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:12394842a3a8affa3ba62b0d4ab7e9e210c5e366fbac3e8b2a68636fb19892c2"}, - {file = "coverage-7.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b6b4c83d8e8ea79f27ab80778c19bc037759aea298da4b56621f4474ffeb117"}, - {file = "coverage-7.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d5b8007f81b88696d06f7df0cb9af0d3b835fe0c8dbf489bad70b45f0e45613"}, - {file = "coverage-7.6.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b57b768feb866f44eeed9f46975f3d6406380275c5ddfe22f531a2bf187eda27"}, - {file = "coverage-7.6.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5915fcdec0e54ee229926868e9b08586376cae1f5faa9bbaf8faf3561b393d52"}, - {file = "coverage-7.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0b58c672d14f16ed92a48db984612f5ce3836ae7d72cdd161001cc54512571f2"}, - {file = "coverage-7.6.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:2fdef0d83a2d08d69b1f2210a93c416d54e14d9eb398f6ab2f0a209433db19e1"}, - {file = "coverage-7.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8cf717ee42012be8c0cb205dbbf18ffa9003c4cbf4ad078db47b95e10748eec5"}, - {file = "coverage-7.6.4-cp312-cp312-win32.whl", hash = "sha256:7bb92c539a624cf86296dd0c68cd5cc286c9eef2d0c3b8b192b604ce9de20a17"}, - {file = "coverage-7.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:1032e178b76a4e2b5b32e19d0fd0abbce4b58e77a1ca695820d10e491fa32b08"}, - {file = "coverage-7.6.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:023bf8ee3ec6d35af9c1c6ccc1d18fa69afa1cb29eaac57cb064dbb262a517f9"}, - {file = "coverage-7.6.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b0ac3d42cb51c4b12df9c5f0dd2f13a4f24f01943627120ec4d293c9181219ba"}, - {file = "coverage-7.6.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8fe4984b431f8621ca53d9380901f62bfb54ff759a1348cd140490ada7b693c"}, - {file = "coverage-7.6.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5fbd612f8a091954a0c8dd4c0b571b973487277d26476f8480bfa4b2a65b5d06"}, - {file = "coverage-7.6.4-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dacbc52de979f2823a819571f2e3a350a7e36b8cb7484cdb1e289bceaf35305f"}, - {file = "coverage-7.6.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dab4d16dfef34b185032580e2f2f89253d302facba093d5fa9dbe04f569c4f4b"}, - {file = "coverage-7.6.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:862264b12ebb65ad8d863d51f17758b1684560b66ab02770d4f0baf2ff75da21"}, - {file = "coverage-7.6.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5beb1ee382ad32afe424097de57134175fea3faf847b9af002cc7895be4e2a5a"}, - {file = "coverage-7.6.4-cp313-cp313-win32.whl", hash = "sha256:bf20494da9653f6410213424f5f8ad0ed885e01f7e8e59811f572bdb20b8972e"}, - {file = "coverage-7.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:182e6cd5c040cec0a1c8d415a87b67ed01193ed9ad458ee427741c7d8513d963"}, - {file = "coverage-7.6.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a181e99301a0ae128493a24cfe5cfb5b488c4e0bf2f8702091473d033494d04f"}, - {file = "coverage-7.6.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:df57bdbeffe694e7842092c5e2e0bc80fff7f43379d465f932ef36f027179806"}, - {file = "coverage-7.6.4-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bcd1069e710600e8e4cf27f65c90c7843fa8edfb4520fb0ccb88894cad08b11"}, - {file = "coverage-7.6.4-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:99b41d18e6b2a48ba949418db48159d7a2e81c5cc290fc934b7d2380515bd0e3"}, - {file = "coverage-7.6.4-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a6b1e54712ba3474f34b7ef7a41e65bd9037ad47916ccb1cc78769bae324c01a"}, - {file = "coverage-7.6.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:53d202fd109416ce011578f321460795abfe10bb901b883cafd9b3ef851bacfc"}, - {file = "coverage-7.6.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:c48167910a8f644671de9f2083a23630fbf7a1cb70ce939440cd3328e0919f70"}, - {file = "coverage-7.6.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:cc8ff50b50ce532de2fa7a7daae9dd12f0a699bfcd47f20945364e5c31799fef"}, - {file = "coverage-7.6.4-cp313-cp313t-win32.whl", hash = "sha256:b8d3a03d9bfcaf5b0141d07a88456bb6a4c3ce55c080712fec8418ef3610230e"}, - {file = "coverage-7.6.4-cp313-cp313t-win_amd64.whl", hash = "sha256:f3ddf056d3ebcf6ce47bdaf56142af51bb7fad09e4af310241e9db7a3a8022e1"}, - {file = "coverage-7.6.4-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9cb7fa111d21a6b55cbf633039f7bc2749e74932e3aa7cb7333f675a58a58bf3"}, - {file = "coverage-7.6.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:11a223a14e91a4693d2d0755c7a043db43d96a7450b4f356d506c2562c48642c"}, - {file = "coverage-7.6.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a413a096c4cbac202433c850ee43fa326d2e871b24554da8327b01632673a076"}, - {file = "coverage-7.6.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00a1d69c112ff5149cabe60d2e2ee948752c975d95f1e1096742e6077affd376"}, - {file = "coverage-7.6.4-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f76846299ba5c54d12c91d776d9605ae33f8ae2b9d1d3c3703cf2db1a67f2c0"}, - {file = "coverage-7.6.4-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:fe439416eb6380de434886b00c859304338f8b19f6f54811984f3420a2e03858"}, - {file = "coverage-7.6.4-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:0294ca37f1ba500667b1aef631e48d875ced93ad5e06fa665a3295bdd1d95111"}, - {file = "coverage-7.6.4-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6f01ba56b1c0e9d149f9ac85a2f999724895229eb36bd997b61e62999e9b0901"}, - {file = "coverage-7.6.4-cp39-cp39-win32.whl", hash = "sha256:bc66f0bf1d7730a17430a50163bb264ba9ded56739112368ba985ddaa9c3bd09"}, - {file = "coverage-7.6.4-cp39-cp39-win_amd64.whl", hash = "sha256:c481b47f6b5845064c65a7bc78bc0860e635a9b055af0df46fdf1c58cebf8e8f"}, - {file = "coverage-7.6.4-pp39.pp310-none-any.whl", hash = "sha256:3c65d37f3a9ebb703e710befdc489a38683a5b152242664b973a7b7b22348a4e"}, - {file = "coverage-7.6.4.tar.gz", hash = "sha256:29fc0f17b1d3fea332f8001d4558f8214af7f1d87a345f3a133c901d60347c73"}, + {file = "coverage-7.6.8-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b39e6011cd06822eb964d038d5dff5da5d98652b81f5ecd439277b32361a3a50"}, + {file = "coverage-7.6.8-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:63c19702db10ad79151a059d2d6336fe0c470f2e18d0d4d1a57f7f9713875dcf"}, + {file = "coverage-7.6.8-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3985b9be361d8fb6b2d1adc9924d01dec575a1d7453a14cccd73225cb79243ee"}, + {file = "coverage-7.6.8-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:644ec81edec0f4ad17d51c838a7d01e42811054543b76d4ba2c5d6af741ce2a6"}, + {file = "coverage-7.6.8-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f188a2402f8359cf0c4b1fe89eea40dc13b52e7b4fd4812450da9fcd210181d"}, + {file = "coverage-7.6.8-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e19122296822deafce89a0c5e8685704c067ae65d45e79718c92df7b3ec3d331"}, + {file = "coverage-7.6.8-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:13618bed0c38acc418896005732e565b317aa9e98d855a0e9f211a7ffc2d6638"}, + {file = "coverage-7.6.8-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:193e3bffca48ad74b8c764fb4492dd875038a2f9925530cb094db92bb5e47bed"}, + {file = "coverage-7.6.8-cp310-cp310-win32.whl", hash = "sha256:3988665ee376abce49613701336544041f2117de7b7fbfe91b93d8ff8b151c8e"}, + {file = "coverage-7.6.8-cp310-cp310-win_amd64.whl", hash = "sha256:f56f49b2553d7dd85fd86e029515a221e5c1f8cb3d9c38b470bc38bde7b8445a"}, + {file = "coverage-7.6.8-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:86cffe9c6dfcfe22e28027069725c7f57f4b868a3f86e81d1c62462764dc46d4"}, + {file = "coverage-7.6.8-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d82ab6816c3277dc962cfcdc85b1efa0e5f50fb2c449432deaf2398a2928ab94"}, + {file = "coverage-7.6.8-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13690e923a3932e4fad4c0ebfb9cb5988e03d9dcb4c5150b5fcbf58fd8bddfc4"}, + {file = "coverage-7.6.8-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4be32da0c3827ac9132bb488d331cb32e8d9638dd41a0557c5569d57cf22c9c1"}, + {file = "coverage-7.6.8-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44e6c85bbdc809383b509d732b06419fb4544dca29ebe18480379633623baafb"}, + {file = "coverage-7.6.8-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:768939f7c4353c0fac2f7c37897e10b1414b571fd85dd9fc49e6a87e37a2e0d8"}, + {file = "coverage-7.6.8-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e44961e36cb13c495806d4cac67640ac2866cb99044e210895b506c26ee63d3a"}, + {file = "coverage-7.6.8-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ea8bb1ab9558374c0ab591783808511d135a833c3ca64a18ec927f20c4030f0"}, + {file = "coverage-7.6.8-cp311-cp311-win32.whl", hash = "sha256:629a1ba2115dce8bf75a5cce9f2486ae483cb89c0145795603d6554bdc83e801"}, + {file = "coverage-7.6.8-cp311-cp311-win_amd64.whl", hash = "sha256:fb9fc32399dca861584d96eccd6c980b69bbcd7c228d06fb74fe53e007aa8ef9"}, + {file = "coverage-7.6.8-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:e683e6ecc587643f8cde8f5da6768e9d165cd31edf39ee90ed7034f9ca0eefee"}, + {file = "coverage-7.6.8-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1defe91d41ce1bd44b40fabf071e6a01a5aa14de4a31b986aa9dfd1b3e3e414a"}, + {file = "coverage-7.6.8-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7ad66e8e50225ebf4236368cc43c37f59d5e6728f15f6e258c8639fa0dd8e6d"}, + {file = "coverage-7.6.8-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3fe47da3e4fda5f1abb5709c156eca207eacf8007304ce3019eb001e7a7204cb"}, + {file = "coverage-7.6.8-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:202a2d645c5a46b84992f55b0a3affe4f0ba6b4c611abec32ee88358db4bb649"}, + {file = "coverage-7.6.8-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4674f0daa1823c295845b6a740d98a840d7a1c11df00d1fd62614545c1583787"}, + {file = "coverage-7.6.8-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:74610105ebd6f33d7c10f8907afed696e79c59e3043c5f20eaa3a46fddf33b4c"}, + {file = "coverage-7.6.8-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:37cda8712145917105e07aab96388ae76e787270ec04bcb9d5cc786d7cbb8443"}, + {file = "coverage-7.6.8-cp312-cp312-win32.whl", hash = "sha256:9e89d5c8509fbd6c03d0dd1972925b22f50db0792ce06324ba069f10787429ad"}, + {file = "coverage-7.6.8-cp312-cp312-win_amd64.whl", hash = "sha256:379c111d3558272a2cae3d8e57e6b6e6f4fe652905692d54bad5ea0ca37c5ad4"}, + {file = "coverage-7.6.8-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b0c69f4f724c64dfbfe79f5dfb503b42fe6127b8d479b2677f2b227478db2eb"}, + {file = "coverage-7.6.8-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:c15b32a7aca8038ed7644f854bf17b663bc38e1671b5d6f43f9a2b2bd0c46f63"}, + {file = "coverage-7.6.8-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63068a11171e4276f6ece913bde059e77c713b48c3a848814a6537f35afb8365"}, + {file = "coverage-7.6.8-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6f4548c5ead23ad13fb7a2c8ea541357474ec13c2b736feb02e19a3085fac002"}, + {file = "coverage-7.6.8-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b4b4299dd0d2c67caaaf286d58aef5e75b125b95615dda4542561a5a566a1e3"}, + {file = "coverage-7.6.8-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c9ebfb2507751f7196995142f057d1324afdab56db1d9743aab7f50289abd022"}, + {file = "coverage-7.6.8-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:c1b4474beee02ede1eef86c25ad4600a424fe36cff01a6103cb4533c6bf0169e"}, + {file = "coverage-7.6.8-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d9fd2547e6decdbf985d579cf3fc78e4c1d662b9b0ff7cc7862baaab71c9cc5b"}, + {file = "coverage-7.6.8-cp313-cp313-win32.whl", hash = "sha256:8aae5aea53cbfe024919715eca696b1a3201886ce83790537d1c3668459c7146"}, + {file = "coverage-7.6.8-cp313-cp313-win_amd64.whl", hash = "sha256:ae270e79f7e169ccfe23284ff5ea2d52a6f401dc01b337efb54b3783e2ce3f28"}, + {file = "coverage-7.6.8-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:de38add67a0af869b0d79c525d3e4588ac1ffa92f39116dbe0ed9753f26eba7d"}, + {file = "coverage-7.6.8-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b07c25d52b1c16ce5de088046cd2432b30f9ad5e224ff17c8f496d9cb7d1d451"}, + {file = "coverage-7.6.8-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:62a66ff235e4c2e37ed3b6104d8b478d767ff73838d1222132a7a026aa548764"}, + {file = "coverage-7.6.8-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:09b9f848b28081e7b975a3626e9081574a7b9196cde26604540582da60235fdf"}, + {file = "coverage-7.6.8-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:093896e530c38c8e9c996901858ac63f3d4171268db2c9c8b373a228f459bbc5"}, + {file = "coverage-7.6.8-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9a7b8ac36fd688c8361cbc7bf1cb5866977ece6e0b17c34aa0df58bda4fa18a4"}, + {file = "coverage-7.6.8-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:38c51297b35b3ed91670e1e4efb702b790002e3245a28c76e627478aa3c10d83"}, + {file = "coverage-7.6.8-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2e4e0f60cb4bd7396108823548e82fdab72d4d8a65e58e2c19bbbc2f1e2bfa4b"}, + {file = "coverage-7.6.8-cp313-cp313t-win32.whl", hash = "sha256:6535d996f6537ecb298b4e287a855f37deaf64ff007162ec0afb9ab8ba3b8b71"}, + {file = "coverage-7.6.8-cp313-cp313t-win_amd64.whl", hash = "sha256:c79c0685f142ca53256722a384540832420dff4ab15fec1863d7e5bc8691bdcc"}, + {file = "coverage-7.6.8-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:3ac47fa29d8d41059ea3df65bd3ade92f97ee4910ed638e87075b8e8ce69599e"}, + {file = "coverage-7.6.8-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:24eda3a24a38157eee639ca9afe45eefa8d2420d49468819ac5f88b10de84f4c"}, + {file = "coverage-7.6.8-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4c81ed2820b9023a9a90717020315e63b17b18c274a332e3b6437d7ff70abe0"}, + {file = "coverage-7.6.8-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bd55f8fc8fa494958772a2a7302b0354ab16e0b9272b3c3d83cdb5bec5bd1779"}, + {file = "coverage-7.6.8-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f39e2f3530ed1626c66e7493be7a8423b023ca852aacdc91fb30162c350d2a92"}, + {file = "coverage-7.6.8-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:716a78a342679cd1177bc8c2fe957e0ab91405bd43a17094324845200b2fddf4"}, + {file = "coverage-7.6.8-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:177f01eeaa3aee4a5ffb0d1439c5952b53d5010f86e9d2667963e632e30082cc"}, + {file = "coverage-7.6.8-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:912e95017ff51dc3d7b6e2be158dedc889d9a5cc3382445589ce554f1a34c0ea"}, + {file = "coverage-7.6.8-cp39-cp39-win32.whl", hash = "sha256:4db3ed6a907b555e57cc2e6f14dc3a4c2458cdad8919e40b5357ab9b6db6c43e"}, + {file = "coverage-7.6.8-cp39-cp39-win_amd64.whl", hash = "sha256:428ac484592f780e8cd7b6b14eb568f7c85460c92e2a37cb0c0e5186e1a0d076"}, + {file = "coverage-7.6.8-pp39.pp310-none-any.whl", hash = "sha256:5c52a036535d12590c32c49209e79cabaad9f9ad8aa4cbd875b68c4d67a9cbce"}, + {file = "coverage-7.6.8.tar.gz", hash = "sha256:8b2b8503edb06822c86d82fa64a4a5cb0760bb8f31f26e138ec743f422f37cfc"}, ] [package.dependencies] @@ -1039,51 +996,53 @@ toml = ["tomli"] [[package]] name = "cryptography" -version = "43.0.3" +version = "44.0.0" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false -python-versions = ">=3.7" -files = [ - {file = "cryptography-43.0.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:bf7a1932ac4176486eab36a19ed4c0492da5d97123f1406cf15e41b05e787d2e"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63efa177ff54aec6e1c0aefaa1a241232dcd37413835a9b674b6e3f0ae2bfd3e"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e1ce50266f4f70bf41a2c6dc4358afadae90e2a1e5342d3c08883df1675374f"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:443c4a81bb10daed9a8f334365fe52542771f25aedaf889fd323a853ce7377d6"}, - {file = "cryptography-43.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:74f57f24754fe349223792466a709f8e0c093205ff0dca557af51072ff47ab18"}, - {file = "cryptography-43.0.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9762ea51a8fc2a88b70cf2995e5675b38d93bf36bd67d91721c309df184f49bd"}, - {file = "cryptography-43.0.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:81ef806b1fef6b06dcebad789f988d3b37ccaee225695cf3e07648eee0fc6b73"}, - {file = "cryptography-43.0.3-cp37-abi3-win32.whl", hash = "sha256:cbeb489927bd7af4aa98d4b261af9a5bc025bd87f0e3547e11584be9e9427be2"}, - {file = "cryptography-43.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:f46304d6f0c6ab8e52770addfa2fc41e6629495548862279641972b6215451cd"}, - {file = "cryptography-43.0.3-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:8ac43ae87929a5982f5948ceda07001ee5e83227fd69cf55b109144938d96984"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:846da004a5804145a5f441b8530b4bf35afbf7da70f82409f151695b127213d5"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0f996e7268af62598f2fc1204afa98a3b5712313a55c4c9d434aef49cadc91d4"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f7b178f11ed3664fd0e995a47ed2b5ff0a12d893e41dd0494f406d1cf555cab7"}, - {file = "cryptography-43.0.3-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:c2e6fc39c4ab499049df3bdf567f768a723a5e8464816e8f009f121a5a9f4405"}, - {file = "cryptography-43.0.3-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:e1be4655c7ef6e1bbe6b5d0403526601323420bcf414598955968c9ef3eb7d16"}, - {file = "cryptography-43.0.3-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:df6b6c6d742395dd77a23ea3728ab62f98379eff8fb61be2744d4679ab678f73"}, - {file = "cryptography-43.0.3-cp39-abi3-win32.whl", hash = "sha256:d56e96520b1020449bbace2b78b603442e7e378a9b3bd68de65c782db1507995"}, - {file = "cryptography-43.0.3-cp39-abi3-win_amd64.whl", hash = "sha256:0c580952eef9bf68c4747774cde7ec1d85a6e61de97281f2dba83c7d2c806362"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:d03b5621a135bffecad2c73e9f4deb1a0f977b9a8ffe6f8e002bf6c9d07b918c"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a2a431ee15799d6db9fe80c82b055bae5a752bef645bba795e8e52687c69efe3"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:281c945d0e28c92ca5e5930664c1cefd85efe80e5c0d2bc58dd63383fda29f83"}, - {file = "cryptography-43.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f18c716be16bc1fea8e95def49edf46b82fccaa88587a45f8dc0ff6ab5d8e0a7"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:4a02ded6cd4f0a5562a8887df8b3bd14e822a90f97ac5e544c162899bc467664"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:53a583b6637ab4c4e3591a15bc9db855b8d9dee9a669b550f311480acab6eb08"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1ec0bcf7e17c0c5669d881b1cd38c4972fade441b27bda1051665faaa89bdcaa"}, - {file = "cryptography-43.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2ce6fae5bdad59577b44e4dfed356944fbf1d925269114c28be377692643b4ff"}, - {file = "cryptography-43.0.3.tar.gz", hash = "sha256:315b9001266a492a6ff443b61238f956b214dbec9910a081ba5b6646a055a805"}, +python-versions = "!=3.9.0,!=3.9.1,>=3.7" +files = [ + {file = "cryptography-44.0.0-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:84111ad4ff3f6253820e6d3e58be2cc2a00adb29335d4cacb5ab4d4d34f2a123"}, + {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b15492a11f9e1b62ba9d73c210e2416724633167de94607ec6069ef724fad092"}, + {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:831c3c4d0774e488fdc83a1923b49b9957d33287de923d58ebd3cec47a0ae43f"}, + {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:761817a3377ef15ac23cd7834715081791d4ec77f9297ee694ca1ee9c2c7e5eb"}, + {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3c672a53c0fb4725a29c303be906d3c1fa99c32f58abe008a82705f9ee96f40b"}, + {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:4ac4c9f37eba52cb6fbeaf5b59c152ea976726b865bd4cf87883a7e7006cc543"}, + {file = "cryptography-44.0.0-cp37-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:60eb32934076fa07e4316b7b2742fa52cbb190b42c2df2863dbc4230a0a9b385"}, + {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ed3534eb1090483c96178fcb0f8893719d96d5274dfde98aa6add34614e97c8e"}, + {file = "cryptography-44.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f3f6fdfa89ee2d9d496e2c087cebef9d4fcbb0ad63c40e821b39f74bf48d9c5e"}, + {file = "cryptography-44.0.0-cp37-abi3-win32.whl", hash = "sha256:eb33480f1bad5b78233b0ad3e1b0be21e8ef1da745d8d2aecbb20671658b9053"}, + {file = "cryptography-44.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:abc998e0c0eee3c8a1904221d3f67dcfa76422b23620173e28c11d3e626c21bd"}, + {file = "cryptography-44.0.0-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:660cb7312a08bc38be15b696462fa7cc7cd85c3ed9c576e81f4dc4d8b2b31591"}, + {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1923cb251c04be85eec9fda837661c67c1049063305d6be5721643c22dd4e2b7"}, + {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:404fdc66ee5f83a1388be54300ae978b2efd538018de18556dde92575e05defc"}, + {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c5eb858beed7835e5ad1faba59e865109f3e52b3783b9ac21e7e47dc5554e289"}, + {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f53c2c87e0fb4b0c00fa9571082a057e37690a8f12233306161c8f4b819960b7"}, + {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:9e6fc8a08e116fb7c7dd1f040074c9d7b51d74a8ea40d4df2fc7aa08b76b9e6c"}, + {file = "cryptography-44.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:9abcc2e083cbe8dde89124a47e5e53ec38751f0d7dfd36801008f316a127d7ba"}, + {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:d2436114e46b36d00f8b72ff57e598978b37399d2786fd39793c36c6d5cb1c64"}, + {file = "cryptography-44.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a01956ddfa0a6790d594f5b34fc1bfa6098aca434696a03cfdbe469b8ed79285"}, + {file = "cryptography-44.0.0-cp39-abi3-win32.whl", hash = "sha256:eca27345e1214d1b9f9490d200f9db5a874479be914199194e746c893788d417"}, + {file = "cryptography-44.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:708ee5f1bafe76d041b53a4f95eb28cdeb8d18da17e597d46d7833ee59b97ede"}, + {file = "cryptography-44.0.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37d76e6863da3774cd9db5b409a9ecfd2c71c981c38788d3fcfaf177f447b731"}, + {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:f677e1268c4e23420c3acade68fac427fffcb8d19d7df95ed7ad17cdef8404f4"}, + {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:f5e7cb1e5e56ca0933b4873c0220a78b773b24d40d186b6738080b73d3d0a756"}, + {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:8b3e6eae66cf54701ee7d9c83c30ac0a1e3fa17be486033000f2a73a12ab507c"}, + {file = "cryptography-44.0.0-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:be4ce505894d15d5c5037167ffb7f0ae90b7be6f2a98f9a5c3442395501c32fa"}, + {file = "cryptography-44.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:62901fb618f74d7d81bf408c8719e9ec14d863086efe4185afd07c352aee1d2c"}, + {file = "cryptography-44.0.0.tar.gz", hash = "sha256:cd4e834f340b4293430701e772ec543b0fbe6c2dea510a5286fe0acabe153a02"}, ] [package.dependencies] cffi = {version = ">=1.12", markers = "platform_python_implementation != \"PyPy\""} [package.extras] -docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"] -docstest = ["pyenchant (>=1.6.11)", "readme-renderer", "sphinxcontrib-spelling (>=4.0.1)"] -nox = ["nox"] -pep8test = ["check-sdist", "click", "mypy", "ruff"] -sdist = ["build"] +docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=3.0.0)"] +docstest = ["pyenchant (>=3)", "readme-renderer (>=30.0)", "sphinxcontrib-spelling (>=7.3.1)"] +nox = ["nox (>=2024.4.15)", "nox[uv] (>=2024.3.2)"] +pep8test = ["check-sdist", "click (>=8.0.1)", "mypy (>=1.4)", "ruff (>=0.3.6)"] +sdist = ["build (>=1.0.0)"] ssh = ["bcrypt (>=3.1.5)"] -test = ["certifi", "cryptography-vectors (==43.0.3)", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-xdist"] +test = ["certifi (>=2024)", "cryptography-vectors (==44.0.0)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] test-randomorder = ["pytest-randomly"] [[package]] @@ -1136,20 +1095,20 @@ optimize = ["orjson"] [[package]] name = "deprecated" -version = "1.2.14" +version = "1.2.15" description = "Python @deprecated decorator to deprecate old python classes, functions or methods." optional = true -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7" files = [ - {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, - {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, + {file = "Deprecated-1.2.15-py2.py3-none-any.whl", hash = "sha256:353bc4a8ac4bfc96800ddab349d89c25dec1079f65fd53acdcc1e0b975b21320"}, + {file = "deprecated-1.2.15.tar.gz", hash = "sha256:683e561a90de76239796e6b6feac66b99030d2dd3fcf61ef996330f14bbb9b0d"}, ] [package.dependencies] wrapt = ">=1.10,<2" [package.extras] -dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "jinja2 (>=3.0.3,<3.1.0)", "setuptools", "sphinx (<2)", "tox"] [[package]] name = "deprecation" @@ -1297,22 +1256,6 @@ files = [ six = "*" termcolor = "*" -[[package]] -name = "flake8" -version = "6.1.0" -description = "the modular source code checker: pep8 pyflakes and co" -optional = false -python-versions = ">=3.8.1" -files = [ - {file = "flake8-6.1.0-py2.py3-none-any.whl", hash = "sha256:ffdfce58ea94c6580c77888a86506937f9a1a227dfcd15f245d694ae20a6b6e5"}, - {file = "flake8-6.1.0.tar.gz", hash = "sha256:d5b3857f07c030bdb5bf41c7f53799571d75c4491748a3adcd47de929e34cd23"}, -] - -[package.dependencies] -mccabe = ">=0.7.0,<0.8.0" -pycodestyle = ">=2.11.0,<2.12.0" -pyflakes = ">=3.1.0,<3.2.0" - [[package]] name = "flupy" version = "1.2.1" @@ -1331,59 +1274,61 @@ dev = ["black", "mypy", "pre-commit", "pylint", "pytest", "pytest-benchmark", "p [[package]] name = "fonttools" -version = "4.54.1" +version = "4.55.1" description = "Tools to manipulate font files" optional = true python-versions = ">=3.8" files = [ - {file = "fonttools-4.54.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7ed7ee041ff7b34cc62f07545e55e1468808691dddfd315d51dd82a6b37ddef2"}, - {file = "fonttools-4.54.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41bb0b250c8132b2fcac148e2e9198e62ff06f3cc472065dff839327945c5882"}, - {file = "fonttools-4.54.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7965af9b67dd546e52afcf2e38641b5be956d68c425bef2158e95af11d229f10"}, - {file = "fonttools-4.54.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:278913a168f90d53378c20c23b80f4e599dca62fbffae4cc620c8eed476b723e"}, - {file = "fonttools-4.54.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0e88e3018ac809b9662615072dcd6b84dca4c2d991c6d66e1970a112503bba7e"}, - {file = "fonttools-4.54.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4aa4817f0031206e637d1e685251ac61be64d1adef111060df84fdcbc6ab6c44"}, - {file = "fonttools-4.54.1-cp310-cp310-win32.whl", hash = "sha256:7e3b7d44e18c085fd8c16dcc6f1ad6c61b71ff463636fcb13df7b1b818bd0c02"}, - {file = "fonttools-4.54.1-cp310-cp310-win_amd64.whl", hash = "sha256:dd9cc95b8d6e27d01e1e1f1fae8559ef3c02c76317da650a19047f249acd519d"}, - {file = "fonttools-4.54.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5419771b64248484299fa77689d4f3aeed643ea6630b2ea750eeab219588ba20"}, - {file = "fonttools-4.54.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:301540e89cf4ce89d462eb23a89464fef50915255ece765d10eee8b2bf9d75b2"}, - {file = "fonttools-4.54.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76ae5091547e74e7efecc3cbf8e75200bc92daaeb88e5433c5e3e95ea8ce5aa7"}, - {file = "fonttools-4.54.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:82834962b3d7c5ca98cb56001c33cf20eb110ecf442725dc5fdf36d16ed1ab07"}, - {file = "fonttools-4.54.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d26732ae002cc3d2ecab04897bb02ae3f11f06dd7575d1df46acd2f7c012a8d8"}, - {file = "fonttools-4.54.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:58974b4987b2a71ee08ade1e7f47f410c367cdfc5a94fabd599c88165f56213a"}, - {file = "fonttools-4.54.1-cp311-cp311-win32.whl", hash = "sha256:ab774fa225238986218a463f3fe151e04d8c25d7de09df7f0f5fce27b1243dbc"}, - {file = "fonttools-4.54.1-cp311-cp311-win_amd64.whl", hash = "sha256:07e005dc454eee1cc60105d6a29593459a06321c21897f769a281ff2d08939f6"}, - {file = "fonttools-4.54.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:54471032f7cb5fca694b5f1a0aaeba4af6e10ae989df408e0216f7fd6cdc405d"}, - {file = "fonttools-4.54.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8fa92cb248e573daab8d032919623cc309c005086d743afb014c836636166f08"}, - {file = "fonttools-4.54.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a911591200114969befa7f2cb74ac148bce5a91df5645443371aba6d222e263"}, - {file = "fonttools-4.54.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93d458c8a6a354dc8b48fc78d66d2a8a90b941f7fec30e94c7ad9982b1fa6bab"}, - {file = "fonttools-4.54.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5eb2474a7c5be8a5331146758debb2669bf5635c021aee00fd7c353558fc659d"}, - {file = "fonttools-4.54.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c9c563351ddc230725c4bdf7d9e1e92cbe6ae8553942bd1fb2b2ff0884e8b714"}, - {file = "fonttools-4.54.1-cp312-cp312-win32.whl", hash = "sha256:fdb062893fd6d47b527d39346e0c5578b7957dcea6d6a3b6794569370013d9ac"}, - {file = "fonttools-4.54.1-cp312-cp312-win_amd64.whl", hash = "sha256:e4564cf40cebcb53f3dc825e85910bf54835e8a8b6880d59e5159f0f325e637e"}, - {file = "fonttools-4.54.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6e37561751b017cf5c40fce0d90fd9e8274716de327ec4ffb0df957160be3bff"}, - {file = "fonttools-4.54.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:357cacb988a18aace66e5e55fe1247f2ee706e01debc4b1a20d77400354cddeb"}, - {file = "fonttools-4.54.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8e953cc0bddc2beaf3a3c3b5dd9ab7554677da72dfaf46951e193c9653e515a"}, - {file = "fonttools-4.54.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:58d29b9a294573d8319f16f2f79e42428ba9b6480442fa1836e4eb89c4d9d61c"}, - {file = "fonttools-4.54.1-cp313-cp313-win32.whl", hash = "sha256:9ef1b167e22709b46bf8168368b7b5d3efeaaa746c6d39661c1b4405b6352e58"}, - {file = "fonttools-4.54.1-cp313-cp313-win_amd64.whl", hash = "sha256:262705b1663f18c04250bd1242b0515d3bbae177bee7752be67c979b7d47f43d"}, - {file = "fonttools-4.54.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:ed2f80ca07025551636c555dec2b755dd005e2ea8fbeb99fc5cdff319b70b23b"}, - {file = "fonttools-4.54.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:9dc080e5a1c3b2656caff2ac2633d009b3a9ff7b5e93d0452f40cd76d3da3b3c"}, - {file = "fonttools-4.54.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d152d1be65652fc65e695e5619e0aa0982295a95a9b29b52b85775243c06556"}, - {file = "fonttools-4.54.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8583e563df41fdecef31b793b4dd3af8a9caa03397be648945ad32717a92885b"}, - {file = "fonttools-4.54.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:0d1d353ef198c422515a3e974a1e8d5b304cd54a4c2eebcae708e37cd9eeffb1"}, - {file = "fonttools-4.54.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:fda582236fee135d4daeca056c8c88ec5f6f6d88a004a79b84a02547c8f57386"}, - {file = "fonttools-4.54.1-cp38-cp38-win32.whl", hash = "sha256:e7d82b9e56716ed32574ee106cabca80992e6bbdcf25a88d97d21f73a0aae664"}, - {file = "fonttools-4.54.1-cp38-cp38-win_amd64.whl", hash = "sha256:ada215fd079e23e060157aab12eba0d66704316547f334eee9ff26f8c0d7b8ab"}, - {file = "fonttools-4.54.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:f5b8a096e649768c2f4233f947cf9737f8dbf8728b90e2771e2497c6e3d21d13"}, - {file = "fonttools-4.54.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4e10d2e0a12e18f4e2dd031e1bf7c3d7017be5c8dbe524d07706179f355c5dac"}, - {file = "fonttools-4.54.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:31c32d7d4b0958600eac75eaf524b7b7cb68d3a8c196635252b7a2c30d80e986"}, - {file = "fonttools-4.54.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c39287f5c8f4a0c5a55daf9eaf9ccd223ea59eed3f6d467133cc727d7b943a55"}, - {file = "fonttools-4.54.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:a7a310c6e0471602fe3bf8efaf193d396ea561486aeaa7adc1f132e02d30c4b9"}, - {file = "fonttools-4.54.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:d3b659d1029946f4ff9b6183984578041b520ce0f8fb7078bb37ec7445806b33"}, - {file = "fonttools-4.54.1-cp39-cp39-win32.whl", hash = "sha256:e96bc94c8cda58f577277d4a71f51c8e2129b8b36fd05adece6320dd3d57de8a"}, - {file = "fonttools-4.54.1-cp39-cp39-win_amd64.whl", hash = "sha256:e8a4b261c1ef91e7188a30571be6ad98d1c6d9fa2427244c545e2fa0a2494dd7"}, - {file = "fonttools-4.54.1-py3-none-any.whl", hash = "sha256:37cddd62d83dc4f72f7c3f3c2bcf2697e89a30efb152079896544a93907733bd"}, - {file = "fonttools-4.54.1.tar.gz", hash = "sha256:957f669d4922f92c171ba01bef7f29410668db09f6c02111e22b2bce446f3285"}, + {file = "fonttools-4.55.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c17a6f9814f83772cd6d9c9009928e1afa4ab66210a31ced721556651075a9a0"}, + {file = "fonttools-4.55.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c4d14eecc814826a01db87a40af3407c892ba49996bc6e49961e386cd78b537c"}, + {file = "fonttools-4.55.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8589f9a15dc005592b94ecdc45b4dfae9bbe9e73542e89af5a5e776e745db83b"}, + {file = "fonttools-4.55.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bfee95bd9395bcd9e6c78955387554335109b6a613db71ef006020b42f761c58"}, + {file = "fonttools-4.55.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:34fa2ecc0bf1923d1a51bf2216a006de2c3c0db02c6aa1470ea50b62b8619bd5"}, + {file = "fonttools-4.55.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9c1c48483148bfb1b9ad951133ceea957faa004f6cb475b67e7bc75d482b48f8"}, + {file = "fonttools-4.55.1-cp310-cp310-win32.whl", hash = "sha256:3e2fc388ca7d023b3c45badd71016fd4185f93e51a22cfe4bd65378af7fba759"}, + {file = "fonttools-4.55.1-cp310-cp310-win_amd64.whl", hash = "sha256:c4c36c71f69d2b3ee30394b0986e5f8b2c461e7eff48dde49b08a90ded9fcdbd"}, + {file = "fonttools-4.55.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5daab3a55d460577f45bb8f5a8eca01fa6cde43ef2ab943b527991f54b735c41"}, + {file = "fonttools-4.55.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:acf1e80cf96c2fbc79e46f669d8713a9a79faaebcc68e31a9fbe600cf8027992"}, + {file = "fonttools-4.55.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e88a0329f7f88a210f09f79c088fb64f8032fc3ab65e2390a40b7d3a11773026"}, + {file = "fonttools-4.55.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03105b42259a8a94b2f0cbf1bee45f7a8a34e7b26c946a8fb89b4967e44091a8"}, + {file = "fonttools-4.55.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:9af3577e821649879ab5774ad0e060af34816af556c77c6d3820345d12bf415e"}, + {file = "fonttools-4.55.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:34bd5de3d0ad085359b79a96575cd6bd1bc2976320ef24a2aa152ead36dbf656"}, + {file = "fonttools-4.55.1-cp311-cp311-win32.whl", hash = "sha256:5da92c4b637f0155a41f345fa81143c8e17425260fcb21521cb2ad4d2cea2a95"}, + {file = "fonttools-4.55.1-cp311-cp311-win_amd64.whl", hash = "sha256:f70234253d15f844e6da1178f019a931f03181463ce0c7b19648b8c370527b07"}, + {file = "fonttools-4.55.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:9c372e527d58ba64b695f15f8014e97bc8826cf64d3380fc89b4196edd3c0fa8"}, + {file = "fonttools-4.55.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:845a967d3bef3245ba81fb5582dc731f6c2c8417fa211f1068c56893504bc000"}, + {file = "fonttools-4.55.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f03be82bcd4ba4418adf10e6165743f824bb09d6594c2743d7f93ea50968805b"}, + {file = "fonttools-4.55.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c42e935cf146f826f556d977660dac88f2fa3fb2efa27d5636c0b89a60c16edf"}, + {file = "fonttools-4.55.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:96328bf91e05621d8e40d9f854af7a262cb0e8313e9b38e7f3a7f3c4c0caaa8b"}, + {file = "fonttools-4.55.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:291acec4d774e8cd2d8472d88c04643a77a3324a15247951bd6cfc969799b69e"}, + {file = "fonttools-4.55.1-cp312-cp312-win32.whl", hash = "sha256:6d768d6632809aec1c3fa8f195b173386d85602334701a6894a601a4d3c80368"}, + {file = "fonttools-4.55.1-cp312-cp312-win_amd64.whl", hash = "sha256:2a3850afdb0be1f79a1e95340a2059226511675c5b68098d4e49bfbeb48a8aab"}, + {file = "fonttools-4.55.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:0c88d427eaf8bd8497b9051f56e0f5f9fb96a311aa7c72cda35e03e18d59cd16"}, + {file = "fonttools-4.55.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f062c95a725a79fd908fe8407b6ad63e230e1c7d6dece2d5d6ecaf843d6927f6"}, + {file = "fonttools-4.55.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f298c5324c45cad073475146bf560f4110ce2dc2488ff12231a343ec489f77bc"}, + {file = "fonttools-4.55.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f06dbb71344ffd85a6cb7e27970a178952f0bdd8d319ed938e64ba4bcc41700"}, + {file = "fonttools-4.55.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4c46b3525166976f5855b1f039b02433dc51eb635fb54d6a111e0c5d6e6cdc4c"}, + {file = "fonttools-4.55.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:af46f52a21e086a2f89b87bd941c9f0f91e5f769e1a5eb3b37c912228814d3e5"}, + {file = "fonttools-4.55.1-cp313-cp313-win32.whl", hash = "sha256:cd7f36335c5725a3fd724cc667c10c3f5254e779bdc5bffefebb33cf5a75ecb1"}, + {file = "fonttools-4.55.1-cp313-cp313-win_amd64.whl", hash = "sha256:5d6394897710ccac7f74df48492d7f02b9586ff0588c66a2c218844e90534b22"}, + {file = "fonttools-4.55.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:52c4f4b383c56e1a4fe8dab1b63c2269ba9eab0695d2d8e033fa037e61e6f1ef"}, + {file = "fonttools-4.55.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:d83892dafdbd62b56545c77b6bd4fa49eef6ec1d6b95e042ee2c930503d1831e"}, + {file = "fonttools-4.55.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:604d5bf16f811fcaaaec2dde139f7ce958462487565edcd54b6fadacb2942083"}, + {file = "fonttools-4.55.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3324b92feb5fd084923a8e89a8248afd5b9f9d81ab9517d7b07cc84403bd448"}, + {file = "fonttools-4.55.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:30f8b1ca9b919c04850678d026fc330c19acaa9e3b282fcacc09a5eb3c8d20c3"}, + {file = "fonttools-4.55.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:1835c98df2cf28c86a66d234895c87df7b9325fd079a8019c5053a389ff55d23"}, + {file = "fonttools-4.55.1-cp38-cp38-win32.whl", hash = "sha256:9f202703720a7cc0049f2ed1a2047925e264384eb5cc4d34f80200d7b17f1b6a"}, + {file = "fonttools-4.55.1-cp38-cp38-win_amd64.whl", hash = "sha256:2efff20aed0338d37c2ff58766bd67f4b9607ded61cf3d6baf1b3e25ea74e119"}, + {file = "fonttools-4.55.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:3032d9bf010c395e6eca2851666cafb1f4ecde85d420188555e928ad0144326e"}, + {file = "fonttools-4.55.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0794055588c30ffe25426048e8a7c0a5271942727cd61fc939391e37f4d580d5"}, + {file = "fonttools-4.55.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:13ba980e3ffd3206b8c63a365f90dc10eeec27da946d5ee5373c3a325a46d77c"}, + {file = "fonttools-4.55.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5d7063babd7434a17a5e355e87de9b2306c85a5c19c7da0794be15c58aab0c39"}, + {file = "fonttools-4.55.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ed84c15144015a58ef550dd6312884c9fb31a2dbc31a6467bcdafd63be7db476"}, + {file = "fonttools-4.55.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:e89419d88b0bbfdb55209e03a17afa2d20db3c2fa0d785543c9d0875668195d5"}, + {file = "fonttools-4.55.1-cp39-cp39-win32.whl", hash = "sha256:6eb781e401b93cda99356bc043ababead2a5096550984d8a4ecf3d5c9f859dc2"}, + {file = "fonttools-4.55.1-cp39-cp39-win_amd64.whl", hash = "sha256:db1031acf04523c5a51c3e1ae19c21a1c32bc5f820a477dd4659a02f9cb82002"}, + {file = "fonttools-4.55.1-py3-none-any.whl", hash = "sha256:4bcfb11f90f48b48c366dd638d773a52fca0d1b9e056dc01df766bf5835baa08"}, + {file = "fonttools-4.55.1.tar.gz", hash = "sha256:85bb2e985718b0df96afc659abfe194c171726054314b019dbbfed31581673c7"}, ] [package.extras] @@ -1599,13 +1544,13 @@ test-win = ["POT", "pytest", "pytest-cov", "testfixtures"] [[package]] name = "gotrue" -version = "2.10.0" +version = "2.11.0" description = "Python Client Library for Supabase Auth" optional = true python-versions = "<4.0,>=3.9" files = [ - {file = "gotrue-2.10.0-py3-none-any.whl", hash = "sha256:768e58207488e5184ffbdc4351b7280d913daf97962f4e9f2cca05c80004b042"}, - {file = "gotrue-2.10.0.tar.gz", hash = "sha256:4edf4c251da3535f2b044e23deba221e848ca1210c17d0c7a9b19f79a1e3f3c0"}, + {file = "gotrue-2.11.0-py3-none-any.whl", hash = "sha256:62177ffd567448b352121bc7e9244ff018d59bb746dad476b51658f856d59cf8"}, + {file = "gotrue-2.11.0.tar.gz", hash = "sha256:a0a452748ef741337820c97b934327c25f796e7cd33c0bf4341346bcc5a837f5"}, ] [package.dependencies] @@ -1742,70 +1687,70 @@ test = ["objgraph", "psutil"] [[package]] name = "grpcio" -version = "1.67.1" +version = "1.68.1" description = "HTTP/2-based RPC framework" optional = true python-versions = ">=3.8" files = [ - {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, - {file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"}, - {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:43112046864317498a33bdc4797ae6a268c36345a910de9b9c17159d8346602f"}, - {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9b929f13677b10f63124c1a410994a401cdd85214ad83ab67cc077fc7e480f0"}, - {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7d1797a8a3845437d327145959a2c0c47c05947c9eef5ff1a4c80e499dcc6fa"}, - {file = "grpcio-1.67.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:0489063974d1452436139501bf6b180f63d4977223ee87488fe36858c5725292"}, - {file = "grpcio-1.67.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9fd042de4a82e3e7aca44008ee2fb5da01b3e5adb316348c21980f7f58adc311"}, - {file = "grpcio-1.67.1-cp310-cp310-win32.whl", hash = "sha256:638354e698fd0c6c76b04540a850bf1db27b4d2515a19fcd5cf645c48d3eb1ed"}, - {file = "grpcio-1.67.1-cp310-cp310-win_amd64.whl", hash = "sha256:608d87d1bdabf9e2868b12338cd38a79969eaf920c89d698ead08f48de9c0f9e"}, - {file = "grpcio-1.67.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:7818c0454027ae3384235a65210bbf5464bd715450e30a3d40385453a85a70cb"}, - {file = "grpcio-1.67.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ea33986b70f83844cd00814cee4451055cd8cab36f00ac64a31f5bb09b31919e"}, - {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:c7a01337407dd89005527623a4a72c5c8e2894d22bead0895306b23c6695698f"}, - {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80b866f73224b0634f4312a4674c1be21b2b4afa73cb20953cbbb73a6b36c3cc"}, - {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9fff78ba10d4250bfc07a01bd6254a6d87dc67f9627adece85c0b2ed754fa96"}, - {file = "grpcio-1.67.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8a23cbcc5bb11ea7dc6163078be36c065db68d915c24f5faa4f872c573bb400f"}, - {file = "grpcio-1.67.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1a65b503d008f066e994f34f456e0647e5ceb34cfcec5ad180b1b44020ad4970"}, - {file = "grpcio-1.67.1-cp311-cp311-win32.whl", hash = "sha256:e29ca27bec8e163dca0c98084040edec3bc49afd10f18b412f483cc68c712744"}, - {file = "grpcio-1.67.1-cp311-cp311-win_amd64.whl", hash = "sha256:786a5b18544622bfb1e25cc08402bd44ea83edfb04b93798d85dca4d1a0b5be5"}, - {file = "grpcio-1.67.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:267d1745894200e4c604958da5f856da6293f063327cb049a51fe67348e4f953"}, - {file = "grpcio-1.67.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:85f69fdc1d28ce7cff8de3f9c67db2b0ca9ba4449644488c1e0303c146135ddb"}, - {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f26b0b547eb8d00e195274cdfc63ce64c8fc2d3e2d00b12bf468ece41a0423a0"}, - {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4422581cdc628f77302270ff839a44f4c24fdc57887dc2a45b7e53d8fc2376af"}, - {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d7616d2ded471231c701489190379e0c311ee0a6c756f3c03e6a62b95a7146e"}, - {file = "grpcio-1.67.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8a00efecde9d6fcc3ab00c13f816313c040a28450e5e25739c24f432fc6d3c75"}, - {file = "grpcio-1.67.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:699e964923b70f3101393710793289e42845791ea07565654ada0969522d0a38"}, - {file = "grpcio-1.67.1-cp312-cp312-win32.whl", hash = "sha256:4e7b904484a634a0fff132958dabdb10d63e0927398273917da3ee103e8d1f78"}, - {file = "grpcio-1.67.1-cp312-cp312-win_amd64.whl", hash = "sha256:5721e66a594a6c4204458004852719b38f3d5522082be9061d6510b455c90afc"}, - {file = "grpcio-1.67.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:aa0162e56fd10a5547fac8774c4899fc3e18c1aa4a4759d0ce2cd00d3696ea6b"}, - {file = "grpcio-1.67.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:beee96c8c0b1a75d556fe57b92b58b4347c77a65781ee2ac749d550f2a365dc1"}, - {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:a93deda571a1bf94ec1f6fcda2872dad3ae538700d94dc283c672a3b508ba3af"}, - {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e6f255980afef598a9e64a24efce87b625e3e3c80a45162d111a461a9f92955"}, - {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e838cad2176ebd5d4a8bb03955138d6589ce9e2ce5d51c3ada34396dbd2dba8"}, - {file = "grpcio-1.67.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:a6703916c43b1d468d0756c8077b12017a9fcb6a1ef13faf49e67d20d7ebda62"}, - {file = "grpcio-1.67.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:917e8d8994eed1d86b907ba2a61b9f0aef27a2155bca6cbb322430fc7135b7bb"}, - {file = "grpcio-1.67.1-cp313-cp313-win32.whl", hash = "sha256:e279330bef1744040db8fc432becc8a727b84f456ab62b744d3fdb83f327e121"}, - {file = "grpcio-1.67.1-cp313-cp313-win_amd64.whl", hash = "sha256:fa0c739ad8b1996bd24823950e3cb5152ae91fca1c09cc791190bf1627ffefba"}, - {file = "grpcio-1.67.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:178f5db771c4f9a9facb2ab37a434c46cb9be1a75e820f187ee3d1e7805c4f65"}, - {file = "grpcio-1.67.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0f3e49c738396e93b7ba9016e153eb09e0778e776df6090c1b8c91877cc1c426"}, - {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:24e8a26dbfc5274d7474c27759b54486b8de23c709d76695237515bc8b5baeab"}, - {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b6c16489326d79ead41689c4b84bc40d522c9a7617219f4ad94bc7f448c5085"}, - {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e6a4dcf5af7bbc36fd9f81c9f372e8ae580870a9e4b6eafe948cd334b81cf3"}, - {file = "grpcio-1.67.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:95b5f2b857856ed78d72da93cd7d09b6db8ef30102e5e7fe0961fe4d9f7d48e8"}, - {file = "grpcio-1.67.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b49359977c6ec9f5d0573ea4e0071ad278ef905aa74e420acc73fd28ce39e9ce"}, - {file = "grpcio-1.67.1-cp38-cp38-win32.whl", hash = "sha256:f5b76ff64aaac53fede0cc93abf57894ab2a7362986ba22243d06218b93efe46"}, - {file = "grpcio-1.67.1-cp38-cp38-win_amd64.whl", hash = "sha256:804c6457c3cd3ec04fe6006c739579b8d35c86ae3298ffca8de57b493524b771"}, - {file = "grpcio-1.67.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:a25bdea92b13ff4d7790962190bf6bf5c4639876e01c0f3dda70fc2769616335"}, - {file = "grpcio-1.67.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc491ae35a13535fd9196acb5afe1af37c8237df2e54427be3eecda3653127e"}, - {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:85f862069b86a305497e74d0dc43c02de3d1d184fc2c180993aa8aa86fbd19b8"}, - {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec74ef02010186185de82cc594058a3ccd8d86821842bbac9873fd4a2cf8be8d"}, - {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01f616a964e540638af5130469451cf580ba8c7329f45ca998ab66e0c7dcdb04"}, - {file = "grpcio-1.67.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:299b3d8c4f790c6bcca485f9963b4846dd92cf6f1b65d3697145d005c80f9fe8"}, - {file = "grpcio-1.67.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:60336bff760fbb47d7e86165408126f1dded184448e9a4c892189eb7c9d3f90f"}, - {file = "grpcio-1.67.1-cp39-cp39-win32.whl", hash = "sha256:5ed601c4c6008429e3d247ddb367fe8c7259c355757448d7c1ef7bd4a6739e8e"}, - {file = "grpcio-1.67.1-cp39-cp39-win_amd64.whl", hash = "sha256:5db70d32d6703b89912af16d6d45d78406374a8b8ef0d28140351dd0ec610e98"}, - {file = "grpcio-1.67.1.tar.gz", hash = "sha256:3dc2ed4cabea4dc14d5e708c2b426205956077cc5de419b4d4079315017e9732"}, + {file = "grpcio-1.68.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:d35740e3f45f60f3c37b1e6f2f4702c23867b9ce21c6410254c9c682237da68d"}, + {file = "grpcio-1.68.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:d99abcd61760ebb34bdff37e5a3ba333c5cc09feda8c1ad42547bea0416ada78"}, + {file = "grpcio-1.68.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:f8261fa2a5f679abeb2a0a93ad056d765cdca1c47745eda3f2d87f874ff4b8c9"}, + {file = "grpcio-1.68.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0feb02205a27caca128627bd1df4ee7212db051019a9afa76f4bb6a1a80ca95e"}, + {file = "grpcio-1.68.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:919d7f18f63bcad3a0f81146188e90274fde800a94e35d42ffe9eadf6a9a6330"}, + {file = "grpcio-1.68.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:963cc8d7d79b12c56008aabd8b457f400952dbea8997dd185f155e2f228db079"}, + {file = "grpcio-1.68.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ccf2ebd2de2d6661e2520dae293298a3803a98ebfc099275f113ce1f6c2a80f1"}, + {file = "grpcio-1.68.1-cp310-cp310-win32.whl", hash = "sha256:2cc1fd04af8399971bcd4f43bd98c22d01029ea2e56e69c34daf2bf8470e47f5"}, + {file = "grpcio-1.68.1-cp310-cp310-win_amd64.whl", hash = "sha256:ee2e743e51cb964b4975de572aa8fb95b633f496f9fcb5e257893df3be854746"}, + {file = "grpcio-1.68.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:55857c71641064f01ff0541a1776bfe04a59db5558e82897d35a7793e525774c"}, + {file = "grpcio-1.68.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:4b177f5547f1b995826ef529d2eef89cca2f830dd8b2c99ffd5fde4da734ba73"}, + {file = "grpcio-1.68.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:3522c77d7e6606d6665ec8d50e867f13f946a4e00c7df46768f1c85089eae515"}, + {file = "grpcio-1.68.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9d1fae6bbf0816415b81db1e82fb3bf56f7857273c84dcbe68cbe046e58e1ccd"}, + {file = "grpcio-1.68.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:298ee7f80e26f9483f0b6f94cc0a046caf54400a11b644713bb5b3d8eb387600"}, + {file = "grpcio-1.68.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cbb5780e2e740b6b4f2d208e90453591036ff80c02cc605fea1af8e6fc6b1bbe"}, + {file = "grpcio-1.68.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:ddda1aa22495d8acd9dfbafff2866438d12faec4d024ebc2e656784d96328ad0"}, + {file = "grpcio-1.68.1-cp311-cp311-win32.whl", hash = "sha256:b33bd114fa5a83f03ec6b7b262ef9f5cac549d4126f1dc702078767b10c46ed9"}, + {file = "grpcio-1.68.1-cp311-cp311-win_amd64.whl", hash = "sha256:7f20ebec257af55694d8f993e162ddf0d36bd82d4e57f74b31c67b3c6d63d8b2"}, + {file = "grpcio-1.68.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:8829924fffb25386995a31998ccbbeaa7367223e647e0122043dfc485a87c666"}, + {file = "grpcio-1.68.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:3aed6544e4d523cd6b3119b0916cef3d15ef2da51e088211e4d1eb91a6c7f4f1"}, + {file = "grpcio-1.68.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:4efac5481c696d5cb124ff1c119a78bddbfdd13fc499e3bc0ca81e95fc573684"}, + {file = "grpcio-1.68.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ab2d912ca39c51f46baf2a0d92aa265aa96b2443266fc50d234fa88bf877d8e"}, + {file = "grpcio-1.68.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c87ce2a97434dffe7327a4071839ab8e8bffd0054cc74cbe971fba98aedd60"}, + {file = "grpcio-1.68.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:e4842e4872ae4ae0f5497bf60a0498fa778c192cc7a9e87877abd2814aca9475"}, + {file = "grpcio-1.68.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:255b1635b0ed81e9f91da4fcc8d43b7ea5520090b9a9ad9340d147066d1d3613"}, + {file = "grpcio-1.68.1-cp312-cp312-win32.whl", hash = "sha256:7dfc914cc31c906297b30463dde0b9be48e36939575eaf2a0a22a8096e69afe5"}, + {file = "grpcio-1.68.1-cp312-cp312-win_amd64.whl", hash = "sha256:a0c8ddabef9c8f41617f213e527254c41e8b96ea9d387c632af878d05db9229c"}, + {file = "grpcio-1.68.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:a47faedc9ea2e7a3b6569795c040aae5895a19dde0c728a48d3c5d7995fda385"}, + {file = "grpcio-1.68.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:390eee4225a661c5cd133c09f5da1ee3c84498dc265fd292a6912b65c421c78c"}, + {file = "grpcio-1.68.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:66a24f3d45c33550703f0abb8b656515b0ab777970fa275693a2f6dc8e35f1c1"}, + {file = "grpcio-1.68.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c08079b4934b0bf0a8847f42c197b1d12cba6495a3d43febd7e99ecd1cdc8d54"}, + {file = "grpcio-1.68.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8720c25cd9ac25dd04ee02b69256d0ce35bf8a0f29e20577427355272230965a"}, + {file = "grpcio-1.68.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:04cfd68bf4f38f5bb959ee2361a7546916bd9a50f78617a346b3aeb2b42e2161"}, + {file = "grpcio-1.68.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c28848761a6520c5c6071d2904a18d339a796ebe6b800adc8b3f474c5ce3c3ad"}, + {file = "grpcio-1.68.1-cp313-cp313-win32.whl", hash = "sha256:77d65165fc35cff6e954e7fd4229e05ec76102d4406d4576528d3a3635fc6172"}, + {file = "grpcio-1.68.1-cp313-cp313-win_amd64.whl", hash = "sha256:a8040f85dcb9830d8bbb033ae66d272614cec6faceee88d37a88a9bd1a7a704e"}, + {file = "grpcio-1.68.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:eeb38ff04ab6e5756a2aef6ad8d94e89bb4a51ef96e20f45c44ba190fa0bcaad"}, + {file = "grpcio-1.68.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:8a3869a6661ec8f81d93f4597da50336718bde9eb13267a699ac7e0a1d6d0bea"}, + {file = "grpcio-1.68.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2c4cec6177bf325eb6faa6bd834d2ff6aa8bb3b29012cceb4937b86f8b74323c"}, + {file = "grpcio-1.68.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:12941d533f3cd45d46f202e3667be8ebf6bcb3573629c7ec12c3e211d99cfccf"}, + {file = "grpcio-1.68.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80af6f1e69c5e68a2be529990684abdd31ed6622e988bf18850075c81bb1ad6e"}, + {file = "grpcio-1.68.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e8dbe3e00771bfe3d04feed8210fc6617006d06d9a2679b74605b9fed3e8362c"}, + {file = "grpcio-1.68.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:83bbf5807dc3ee94ce1de2dfe8a356e1d74101e4b9d7aa8c720cc4818a34aded"}, + {file = "grpcio-1.68.1-cp38-cp38-win32.whl", hash = "sha256:8cb620037a2fd9eeee97b4531880e439ebfcd6d7d78f2e7dcc3726428ab5ef63"}, + {file = "grpcio-1.68.1-cp38-cp38-win_amd64.whl", hash = "sha256:52fbf85aa71263380d330f4fce9f013c0798242e31ede05fcee7fbe40ccfc20d"}, + {file = "grpcio-1.68.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:cb400138e73969eb5e0535d1d06cae6a6f7a15f2cc74add320e2130b8179211a"}, + {file = "grpcio-1.68.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:a1b988b40f2fd9de5c820f3a701a43339d8dcf2cb2f1ca137e2c02671cc83ac1"}, + {file = "grpcio-1.68.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:96f473cdacfdd506008a5d7579c9f6a7ff245a9ade92c3c0265eb76cc591914f"}, + {file = "grpcio-1.68.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:37ea3be171f3cf3e7b7e412a98b77685eba9d4fd67421f4a34686a63a65d99f9"}, + {file = "grpcio-1.68.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ceb56c4285754e33bb3c2fa777d055e96e6932351a3082ce3559be47f8024f0"}, + {file = "grpcio-1.68.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:dffd29a2961f3263a16d73945b57cd44a8fd0b235740cb14056f0612329b345e"}, + {file = "grpcio-1.68.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:025f790c056815b3bf53da850dd70ebb849fd755a4b1ac822cb65cd631e37d43"}, + {file = "grpcio-1.68.1-cp39-cp39-win32.whl", hash = "sha256:1098f03dedc3b9810810568060dea4ac0822b4062f537b0f53aa015269be0a76"}, + {file = "grpcio-1.68.1-cp39-cp39-win_amd64.whl", hash = "sha256:334ab917792904245a028f10e803fcd5b6f36a7b2173a820c0b5b076555825e1"}, + {file = "grpcio-1.68.1.tar.gz", hash = "sha256:44a8502dd5de653ae6a73e2de50a401d84184f0331d0ac3daeb044e66d5c5054"}, ] [package.extras] -protobuf = ["grpcio-tools (>=1.67.1)"] +protobuf = ["grpcio-tools (>=1.68.1)"] [[package]] name = "grpcio-tools" @@ -1955,13 +1900,13 @@ files = [ [[package]] name = "httpcore" -version = "1.0.6" +version = "1.0.7" description = "A minimal low-level HTTP client." optional = false python-versions = ">=3.8" files = [ - {file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"}, - {file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"}, + {file = "httpcore-1.0.7-py3-none-any.whl", hash = "sha256:a3fff8f43dc260d5bd363d9f9cf1830fa3a458b332856f34282de498ed420edd"}, + {file = "httpcore-1.0.7.tar.gz", hash = "sha256:8551cb62a169ec7162ac7be8d4817d561f60e08eaa485234898414bb5a8a0b4c"}, ] [package.dependencies] @@ -2002,13 +1947,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "huggingface-hub" -version = "0.26.2" +version = "0.26.3" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = false python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.26.2-py3-none-any.whl", hash = "sha256:98c2a5a8e786c7b2cb6fdeb2740893cba4d53e312572ed3d8afafda65b128c46"}, - {file = "huggingface_hub-0.26.2.tar.gz", hash = "sha256:b100d853465d965733964d123939ba287da60a547087783ddff8a323f340332b"}, + {file = "huggingface_hub-0.26.3-py3-none-any.whl", hash = "sha256:e66aa99e569c2d5419240a9e553ad07245a5b1300350bfbc5a4945cf7432991b"}, + {file = "huggingface_hub-0.26.3.tar.gz", hash = "sha256:90e1fe62ffc26757a073aaad618422b899ccf9447c2bba8c902a90bef5b42e1d"}, ] [package.dependencies] @@ -2064,13 +2009,13 @@ scipy = ">=1.4.0" [[package]] name = "identify" -version = "2.6.2" +version = "2.6.3" description = "File identification library for Python" optional = false python-versions = ">=3.9" files = [ - {file = "identify-2.6.2-py2.py3-none-any.whl", hash = "sha256:c097384259f49e372f4ea00a19719d95ae27dd5ff0fd77ad630aa891306b82f3"}, - {file = "identify-2.6.2.tar.gz", hash = "sha256:fab5c716c24d7a789775228823797296a2994b075fb6080ac83a102772a98cbd"}, + {file = "identify-2.6.3-py2.py3-none-any.whl", hash = "sha256:9edba65473324c2ea9684b1f944fe3191db3345e50b6d04571d10ed164f8d7bd"}, + {file = "identify-2.6.3.tar.gz", hash = "sha256:62f5dae9b5fef52c84cc188514e9ea4f3f636b1d8799ab5ebc475471f9e47a02"}, ] [package.extras] @@ -2160,84 +2105,86 @@ i18n = ["Babel (>=2.7)"] [[package]] name = "jiter" -version = "0.7.0" +version = "0.8.0" description = "Fast iterable JSON parser." optional = false python-versions = ">=3.8" files = [ - {file = "jiter-0.7.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:e14027f61101b3f5e173095d9ecf95c1cac03ffe45a849279bde1d97e559e314"}, - {file = "jiter-0.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:979ec4711c2e37ac949561858bd42028884c9799516a923e1ff0b501ef341a4a"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:662d5d3cca58ad6af7a3c6226b641c8655de5beebcb686bfde0df0f21421aafa"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1d89008fb47043a469f97ad90840b97ba54e7c3d62dc7cbb6cbf938bd0caf71d"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a8b16c35c846a323ce9067170d5ab8c31ea3dbcab59c4f7608bbbf20c2c3b43f"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c9e82daaa1b0a68704f9029b81e664a5a9de3e466c2cbaabcda5875f961702e7"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:43a87a9f586636e1f0dd3651a91f79b491ea0d9fd7cbbf4f5c463eebdc48bda7"}, - {file = "jiter-0.7.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2ec05b1615f96cc3e4901678bc863958611584072967d9962f9e571d60711d52"}, - {file = "jiter-0.7.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:a5cb97e35370bde7aa0d232a7f910f5a0fbbc96bc0a7dbaa044fd5cd6bcd7ec3"}, - {file = "jiter-0.7.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:cb316dacaf48c8c187cea75d0d7f835f299137e6fdd13f691dff8f92914015c7"}, - {file = "jiter-0.7.0-cp310-none-win32.whl", hash = "sha256:243f38eb4072763c54de95b14ad283610e0cd3bf26393870db04e520f60eebb3"}, - {file = "jiter-0.7.0-cp310-none-win_amd64.whl", hash = "sha256:2221d5603c139f6764c54e37e7c6960c469cbcd76928fb10d15023ba5903f94b"}, - {file = "jiter-0.7.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:91cec0ad755bd786c9f769ce8d843af955df6a8e56b17658771b2d5cb34a3ff8"}, - {file = "jiter-0.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:feba70a28a27d962e353e978dbb6afd798e711c04cb0b4c5e77e9d3779033a1a"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9d866ec066c3616cacb8535dbda38bb1d470b17b25f0317c4540182bc886ce2"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8e7a7a00b6f9f18289dd563596f97ecaba6c777501a8ba04bf98e03087bcbc60"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9aaf564094c7db8687f2660605e099f3d3e6ea5e7135498486674fcb78e29165"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a4d27e09825c1b3c7a667adb500ce8b840e8fc9f630da8454b44cdd4fb0081bb"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca7c287da9c1d56dda88da1d08855a787dbb09a7e2bd13c66a2e288700bd7c7"}, - {file = "jiter-0.7.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:db19a6d160f093cbc8cd5ea2abad420b686f6c0e5fb4f7b41941ebc6a4f83cda"}, - {file = "jiter-0.7.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e46a63c7f877cf7441ffc821c28287cfb9f533ae6ed707bde15e7d4dfafa7ae"}, - {file = "jiter-0.7.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7ba426fa7ff21cb119fa544b75dd3fbee6a70e55a5829709c0338d07ccd30e6d"}, - {file = "jiter-0.7.0-cp311-none-win32.whl", hash = "sha256:c07f55a64912b0c7982377831210836d2ea92b7bd343fca67a32212dd72e38e0"}, - {file = "jiter-0.7.0-cp311-none-win_amd64.whl", hash = "sha256:ed27b2c43e1b5f6c7fedc5c11d4d8bfa627de42d1143d87e39e2e83ddefd861a"}, - {file = "jiter-0.7.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ac7930bcaaeb1e229e35c91c04ed2e9f39025b86ee9fc3141706bbf6fff4aeeb"}, - {file = "jiter-0.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:571feae3e7c901a8eedde9fd2865b0dfc1432fb15cab8c675a8444f7d11b7c5d"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8af4df8a262fa2778b68c2a03b6e9d1cb4d43d02bea6976d46be77a3a331af1"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd028d4165097a611eb0c7494d8c1f2aebd46f73ca3200f02a175a9c9a6f22f5"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c6b487247c7836810091e9455efe56a52ec51bfa3a222237e1587d04d3e04527"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e6d28a92f28814e1a9f2824dc11f4e17e1df1f44dc4fdeb94c5450d34bcb2602"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90443994bbafe134f0b34201dad3ebe1c769f0599004084e046fb249ad912425"}, - {file = "jiter-0.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f9abf464f9faac652542ce8360cea8e68fba2b78350e8a170248f9bcc228702a"}, - {file = "jiter-0.7.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db7a8d99fc5f842f7d2852f06ccaed066532292c41723e5dff670c339b649f88"}, - {file = "jiter-0.7.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:15cf691ebd8693b70c94627d6b748f01e6d697d9a6e9f2bc310934fcfb7cf25e"}, - {file = "jiter-0.7.0-cp312-none-win32.whl", hash = "sha256:9dcd54fa422fb66ca398bec296fed5f58e756aa0589496011cfea2abb5be38a5"}, - {file = "jiter-0.7.0-cp312-none-win_amd64.whl", hash = "sha256:cc989951f73f9375b8eacd571baaa057f3d7d11b7ce6f67b9d54642e7475bfad"}, - {file = "jiter-0.7.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:24cecd18df540963cd27c08ca5ce1d0179f229ff78066d9eecbe5add29361340"}, - {file = "jiter-0.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d41b46236b90b043cca73785674c23d2a67d16f226394079d0953f94e765ed76"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b160db0987171365c153e406a45dcab0ee613ae3508a77bfff42515cb4ce4d6e"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d1c8d91e0f0bd78602eaa081332e8ee4f512c000716f5bc54e9a037306d693a7"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:997706c683195eeff192d2e5285ce64d2a610414f37da3a3f2625dcf8517cf90"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7ea52a8a0ff0229ab2920284079becd2bae0688d432fca94857ece83bb49c541"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d77449d2738cf74752bb35d75ee431af457e741124d1db5e112890023572c7c"}, - {file = "jiter-0.7.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8203519907a1d81d6cb00902c98e27c2d0bf25ce0323c50ca594d30f5f1fbcf"}, - {file = "jiter-0.7.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41d15ccc53931c822dd7f1aebf09faa3cda2d7b48a76ef304c7dbc19d1302e51"}, - {file = "jiter-0.7.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:febf3179b2fabf71fbd2fd52acb8594163bb173348b388649567a548f356dbf6"}, - {file = "jiter-0.7.0-cp313-none-win32.whl", hash = "sha256:4a8e2d866e7eda19f012444e01b55079d8e1c4c30346aaac4b97e80c54e2d6d3"}, - {file = "jiter-0.7.0-cp313-none-win_amd64.whl", hash = "sha256:7417c2b928062c496f381fb0cb50412eee5ad1d8b53dbc0e011ce45bb2de522c"}, - {file = "jiter-0.7.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:9c62c737b5368e51e74960a08fe1adc807bd270227291daede78db24d5fbf556"}, - {file = "jiter-0.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:e4640722b1bef0f6e342fe4606aafaae0eb4f4be5c84355bb6867f34400f6688"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f367488c3b9453eab285424c61098faa1cab37bb49425e69c8dca34f2dfe7d69"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0cf5d42beb3514236459454e3287db53d9c4d56c4ebaa3e9d0efe81b19495129"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cc5190ea1113ee6f7252fa8a5fe5a6515422e378356c950a03bbde5cafbdbaab"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:63ee47a149d698796a87abe445fc8dee21ed880f09469700c76c8d84e0d11efd"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48592c26ea72d3e71aa4bea0a93454df907d80638c3046bb0705507b6704c0d7"}, - {file = "jiter-0.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:79fef541199bd91cfe8a74529ecccb8eaf1aca38ad899ea582ebbd4854af1e51"}, - {file = "jiter-0.7.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d1ef6bb66041f2514739240568136c81b9dcc64fd14a43691c17ea793b6535c0"}, - {file = "jiter-0.7.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aca4d950863b1c238e315bf159466e064c98743eef3bd0ff9617e48ff63a4715"}, - {file = "jiter-0.7.0-cp38-none-win32.whl", hash = "sha256:897745f230350dcedb8d1ebe53e33568d48ea122c25e6784402b6e4e88169be7"}, - {file = "jiter-0.7.0-cp38-none-win_amd64.whl", hash = "sha256:b928c76a422ef3d0c85c5e98c498ce3421b313c5246199541e125b52953e1bc0"}, - {file = "jiter-0.7.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:c9b669ff6f8ba08270dee9ccf858d3b0203b42314a428a1676762f2d390fbb64"}, - {file = "jiter-0.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b5be919bacd73ca93801c3042bce6e95cb9c555a45ca83617b9b6c89df03b9c2"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a282e1e8a396dabcea82d64f9d05acf7efcf81ecdd925b967020dcb0e671c103"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:17ecb1a578a56e97a043c72b463776b5ea30343125308f667fb8fce4b3796735"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7b6045fa0527129218cdcd8a8b839f678219686055f31ebab35f87d354d9c36e"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:189cc4262a92e33c19d4fd24018f5890e4e6da5b2581f0059938877943f8298c"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c138414839effbf30d185e30475c6dc8a16411a1e3681e5fd4605ab1233ac67a"}, - {file = "jiter-0.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2791604acef33da6b72d5ecf885a32384bcaf9aa1e4be32737f3b8b9588eef6a"}, - {file = "jiter-0.7.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ae60ec89037a78d60bbf3d8b127f1567769c8fa24886e0abed3f622791dea478"}, - {file = "jiter-0.7.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:836f03dea312967635233d826f783309b98cfd9ccc76ac776e224cfcef577862"}, - {file = "jiter-0.7.0-cp39-none-win32.whl", hash = "sha256:ebc30ae2ce4bc4986e1764c404b4ea1924f926abf02ce92516485098f8545374"}, - {file = "jiter-0.7.0-cp39-none-win_amd64.whl", hash = "sha256:abf596f951370c648f37aa9899deab296c42a3829736e598b0dd10b08f77a44d"}, - {file = "jiter-0.7.0.tar.gz", hash = "sha256:c061d9738535497b5509f8970584f20de1e900806b239a39a9994fc191dad630"}, + {file = "jiter-0.8.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:dee4eeb293ffcd2c3b31ebab684dbf7f7b71fe198f8eddcdf3a042cc6e10205a"}, + {file = "jiter-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:aad1e6e9b01cf0304dcee14db03e92e0073287a6297caf5caf2e9dbfea16a924"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:504099fb7acdbe763e10690d560a25d4aee03d918d6a063f3a761d8a09fb833f"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2373487caad7fe39581f588ab5c9262fc1ade078d448626fec93f4ffba528858"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c341ecc3f9bccde952898b0c97c24f75b84b56a7e2f8bbc7c8e38cab0875a027"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0e48e7a336529b9419d299b70c358d4ebf99b8f4b847ed3f1000ec9f320e8c0c"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f5ee157a8afd2943be690db679f82fafb8d347a8342e8b9c34863de30c538d55"}, + {file = "jiter-0.8.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d7dceae3549b80087f913aad4acc2a7c1e0ab7cb983effd78bdc9c41cabdcf18"}, + {file = "jiter-0.8.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e29e9ecce53d396772590438214cac4ab89776f5e60bd30601f1050b34464019"}, + {file = "jiter-0.8.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fa1782f22d5f92c620153133f35a9a395d3f3823374bceddd3e7032e2fdfa0b1"}, + {file = "jiter-0.8.0-cp310-none-win32.whl", hash = "sha256:f754ef13b4e4f67a3bf59fe974ef4342523801c48bf422f720bd37a02a360584"}, + {file = "jiter-0.8.0-cp310-none-win_amd64.whl", hash = "sha256:796f750b65f5d605f5e7acaccc6b051675e60c41d7ac3eab40dbd7b5b81a290f"}, + {file = "jiter-0.8.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f6f4e645efd96b4690b9b6091dbd4e0fa2885ba5c57a0305c1916b75b4f30ff6"}, + {file = "jiter-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f61cf6d93c1ade9b8245c9f14b7900feadb0b7899dbe4aa8de268b705647df81"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0396bc5cb1309c6dab085e70bb3913cdd92218315e47b44afe9eace68ee8adaa"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62d0e42ec5dc772bd8554a304358220be5d97d721c4648b23f3a9c01ccc2cb26"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec4b711989860705733fc59fb8c41b2def97041cea656b37cf6c8ea8dee1c3f4"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:859cc35bf304ab066d88f10a44a3251a9cd057fb11ec23e00be22206db878f4f"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5000195921aa293b39b9b5bc959d7fa658e7f18f938c0e52732da8e3cc70a278"}, + {file = "jiter-0.8.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36050284c0abde57aba34964d3920f3d6228211b65df7187059bb7c7f143759a"}, + {file = "jiter-0.8.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:a88f608e050cfe45c48d771e86ecdbf5258314c883c986d4217cc79e1fb5f689"}, + {file = "jiter-0.8.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:646cf4237665b2e13b4159d8f26d53f59bc9f2e6e135e3a508a2e5dd26d978c6"}, + {file = "jiter-0.8.0-cp311-none-win32.whl", hash = "sha256:21fe5b8345db1b3023052b2ade9bb4d369417827242892051244af8fae8ba231"}, + {file = "jiter-0.8.0-cp311-none-win_amd64.whl", hash = "sha256:30c2161c5493acf6b6c3c909973fb64ae863747def01cc7574f3954e0a15042c"}, + {file = "jiter-0.8.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:d91a52d8f49ada2672a4b808a0c5c25d28f320a2c9ca690e30ebd561eb5a1002"}, + {file = "jiter-0.8.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c38cf25cf7862f61410b7a49684d34eb3b5bcbd7ddaf4773eea40e0bd43de706"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c6189beb5c4b3117624be6b2e84545cff7611f5855d02de2d06ff68e316182be"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e13fa849c0e30643554add089983caa82f027d69fad8f50acadcb21c462244ab"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d7765ca159d0a58e8e0f8ca972cd6d26a33bc97b4480d0d2309856763807cd28"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1b0befe7c6e9fc867d5bed21bab0131dfe27d1fa5cd52ba2bced67da33730b7d"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7d6363d4c6f1052b1d8b494eb9a72667c3ef5f80ebacfe18712728e85327000"}, + {file = "jiter-0.8.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a873e57009863eeac3e3969e4653f07031d6270d037d6224415074ac17e5505c"}, + {file = "jiter-0.8.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:2582912473c0d9940791479fe1bf2976a34f212eb8e0a82ee9e645ac275c5d16"}, + {file = "jiter-0.8.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:646163201af42f55393ee6e8f6136b8df488253a6533f4230a64242ecbfe6048"}, + {file = "jiter-0.8.0-cp312-none-win32.whl", hash = "sha256:96e75c9abfbf7387cba89a324d2356d86d8897ac58c956017d062ad510832dae"}, + {file = "jiter-0.8.0-cp312-none-win_amd64.whl", hash = "sha256:ed6074552b4a32e047b52dad5ab497223721efbd0e9efe68c67749f094a092f7"}, + {file = "jiter-0.8.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:dd5e351cb9b3e676ec3360a85ea96def515ad2b83c8ae3a251ce84985a2c9a6f"}, + {file = "jiter-0.8.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ba9f12b0f801ecd5ed0cec29041dc425d1050922b434314c592fc30d51022467"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7ba461c3681728d556392e8ae56fb44a550155a24905f01982317b367c21dd4"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3a15ed47ab09576db560dbc5c2c5a64477535beb056cd7d997d5dd0f2798770e"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cef55042816d0737142b0ec056c0356a5f681fb8d6aa8499b158e87098f4c6f8"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:549f170215adeb5e866f10617c3d019d8eb4e6d4e3c6b724b3b8c056514a3487"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f867edeb279d22020877640d2ea728de5817378c60a51be8af731a8a8f525306"}, + {file = "jiter-0.8.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:aef8845f463093799db4464cee2aa59d61aa8edcb3762aaa4aacbec3f478c929"}, + {file = "jiter-0.8.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:d0d6e22e4062c3d3c1bf3594baa2f67fc9dcdda8275abad99e468e0c6540bc54"}, + {file = "jiter-0.8.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:079e62e64696241ac3f408e337aaac09137ed760ccf2b72b1094b48745c13641"}, + {file = "jiter-0.8.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74d2b56ed3da5760544df53b5f5c39782e68efb64dc3aa0bba4cc08815e6fae8"}, + {file = "jiter-0.8.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:798dafe108cba58a7bb0a50d4d5971f98bb7f3c974e1373e750de6eb21c1a329"}, + {file = "jiter-0.8.0-cp313-none-win32.whl", hash = "sha256:ca6d3064dfc743eb0d3d7539d89d4ba886957c717567adc72744341c1e3573c9"}, + {file = "jiter-0.8.0-cp313-none-win_amd64.whl", hash = "sha256:38caedda64fe1f04b06d7011fc15e86b3b837ed5088657bf778656551e3cd8f9"}, + {file = "jiter-0.8.0-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:bb5c8a0a8d081c338db22e5b8d53a89a121790569cbb85f7d3cfb1fe0fbe9836"}, + {file = "jiter-0.8.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:202dbe8970bfb166fab950eaab8f829c505730a0b33cc5e1cfb0a1c9dd56b2f9"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9046812e5671fdcfb9ae02881fff1f6a14d484b7e8b3316179a372cdfa1e8026"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e6ac56425023e52d65150918ae25480d0a1ce2a6bf5ea2097f66a2cc50f6d692"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7dfcf97210c6eab9d2a1c6af15dd39e1d5154b96a7145d0a97fa1df865b7b834"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d4e3c8444d418686f78c9a547b9b90031faf72a0a1a46bfec7fb31edbd889c0d"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6507011a299b7f578559084256405a8428875540d8d13530e00b688e41b09493"}, + {file = "jiter-0.8.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0aae4738eafdd34f0f25c2d3668ce9e8fa0d7cb75a2efae543c9a69aebc37323"}, + {file = "jiter-0.8.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:7f5d782e790396b13f2a7b36bdcaa3736a33293bdda80a4bf1a3ce0cd5ef9f15"}, + {file = "jiter-0.8.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cc7f993bc2c4e03015445adbb16790c303282fce2e8d9dc3a3905b1d40e50564"}, + {file = "jiter-0.8.0-cp38-none-win32.whl", hash = "sha256:d4a8a6eda018a991fa58ef707dd51524055d11f5acb2f516d70b1be1d15ab39c"}, + {file = "jiter-0.8.0-cp38-none-win_amd64.whl", hash = "sha256:4cca948a3eda8ea24ed98acb0ee19dc755b6ad2e570ec85e1527d5167f91ff67"}, + {file = "jiter-0.8.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:ef89663678d8257063ce7c00d94638e05bd72f662c5e1eb0e07a172e6c1a9a9f"}, + {file = "jiter-0.8.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c402ddcba90b4cc71db3216e8330f4db36e0da2c78cf1d8a9c3ed8f272602a94"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a6dfe795b7a173a9f8ba7421cdd92193d60c1c973bbc50dc3758a9ad0fa5eb6"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8ec29a31b9abd6be39453a2c45da067138a3005d65d2c0507c530e0f1fdcd9a4"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2a488f8c54bddc3ddefaf3bfd6de4a52c97fc265d77bc2dcc6ee540c17e8c342"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aeb5561adf4d26ca0d01b5811b4d7b56a8986699a473d700757b4758ef787883"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4ab961858d7ad13132328517d29f121ae1b2d94502191d6bcf96bddcc8bb5d1c"}, + {file = "jiter-0.8.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a207e718d114d23acf0850a2174d290f42763d955030d9924ffa4227dbd0018f"}, + {file = "jiter-0.8.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:733bc9dc8ff718a0ae4695239e9268eb93e88b73b367dfac3ec227d8ce2f1e77"}, + {file = "jiter-0.8.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d1ec27299e22d05e13a06e460bf7f75f26f9aaa0e0fb7d060f40e88df1d81faa"}, + {file = "jiter-0.8.0-cp39-none-win32.whl", hash = "sha256:e8dbfcb46553e6661d3fc1f33831598fcddf73d0f67834bce9fc3e9ebfe5c439"}, + {file = "jiter-0.8.0-cp39-none-win_amd64.whl", hash = "sha256:af2ce2487b3a93747e2cb5150081d4ae1e5874fce5924fc1a12e9e768e489ad8"}, + {file = "jiter-0.8.0.tar.gz", hash = "sha256:86fee98b569d4cc511ff2e3ec131354fafebd9348a487549c31ad371ae730310"}, ] [[package]] @@ -2433,13 +2380,13 @@ files = [ [[package]] name = "litellm" -version = "1.52.3" +version = "1.53.3" description = "Library to easily interface with LLM API providers" optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.52.3-py3-none-any.whl", hash = "sha256:fc8d5d53ba184cd570ae50d9acefa53c521225b62244adedea129794e98828b6"}, - {file = "litellm-1.52.3.tar.gz", hash = "sha256:4718235cbd6dea8db99b08e884a07f7ac7fad4a4b12597e20d8ff622295e1e05"}, + {file = "litellm-1.53.3-py3-none-any.whl", hash = "sha256:b4bda8efa1d12fe98086c8e84342b92fd499a808017e4d642c43aa784f451b13"}, + {file = "litellm-1.53.3.tar.gz", hash = "sha256:42feb755c8887522ac913a9c04ef2d4242efd821c2ee2d3ad59097047e80f3ca"}, ] [package.dependencies] @@ -2663,13 +2610,13 @@ source = ["Cython (>=3.0.11)"] [[package]] name = "mako" -version = "1.3.6" +version = "1.3.7" description = "A super-fast templating language that borrows the best ideas from the existing templating languages." optional = false python-versions = ">=3.8" files = [ - {file = "Mako-1.3.6-py3-none-any.whl", hash = "sha256:a91198468092a2f1a0de86ca92690fb0cfc43ca90ee17e15d93662b4c04b241a"}, - {file = "mako-1.3.6.tar.gz", hash = "sha256:9ec3a1583713479fae654f83ed9fa8c9a4c16b7bb0daba0e6bbebff50c0d983d"}, + {file = "Mako-1.3.7-py3-none-any.whl", hash = "sha256:d18f990ad57f800ce8e76cbfb0b74afe471c293517e9f5003ace6dad5aa72c36"}, + {file = "mako-1.3.7.tar.gz", hash = "sha256:20405b1232e0759f0e7d87b01f6bb94fce0761747f1cb876ecf90bd512d0b639"}, ] [package.dependencies] @@ -2786,51 +2733,52 @@ tests = ["pytest", "simplejson"] [[package]] name = "matplotlib" -version = "3.9.2" +version = "3.9.3" description = "Python plotting package" optional = true python-versions = ">=3.9" files = [ - {file = "matplotlib-3.9.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:9d78bbc0cbc891ad55b4f39a48c22182e9bdaea7fc0e5dbd364f49f729ca1bbb"}, - {file = "matplotlib-3.9.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c375cc72229614632c87355366bdf2570c2dac01ac66b8ad048d2dabadf2d0d4"}, - {file = "matplotlib-3.9.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d94ff717eb2bd0b58fe66380bd8b14ac35f48a98e7c6765117fe67fb7684e64"}, - {file = "matplotlib-3.9.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ab68d50c06938ef28681073327795c5db99bb4666214d2d5f880ed11aeaded66"}, - {file = "matplotlib-3.9.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:65aacf95b62272d568044531e41de26285d54aec8cb859031f511f84bd8b495a"}, - {file = "matplotlib-3.9.2-cp310-cp310-win_amd64.whl", hash = "sha256:3fd595f34aa8a55b7fc8bf9ebea8aa665a84c82d275190a61118d33fbc82ccae"}, - {file = "matplotlib-3.9.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d8dd059447824eec055e829258ab092b56bb0579fc3164fa09c64f3acd478772"}, - {file = "matplotlib-3.9.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c797dac8bb9c7a3fd3382b16fe8f215b4cf0f22adccea36f1545a6d7be310b41"}, - {file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d719465db13267bcef19ea8954a971db03b9f48b4647e3860e4bc8e6ed86610f"}, - {file = "matplotlib-3.9.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8912ef7c2362f7193b5819d17dae8629b34a95c58603d781329712ada83f9447"}, - {file = "matplotlib-3.9.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:7741f26a58a240f43bee74965c4882b6c93df3e7eb3de160126d8c8f53a6ae6e"}, - {file = "matplotlib-3.9.2-cp311-cp311-win_amd64.whl", hash = "sha256:ae82a14dab96fbfad7965403c643cafe6515e386de723e498cf3eeb1e0b70cc7"}, - {file = "matplotlib-3.9.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ac43031375a65c3196bee99f6001e7fa5bdfb00ddf43379d3c0609bdca042df9"}, - {file = "matplotlib-3.9.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:be0fc24a5e4531ae4d8e858a1a548c1fe33b176bb13eff7f9d0d38ce5112a27d"}, - {file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf81de2926c2db243c9b2cbc3917619a0fc85796c6ba4e58f541df814bbf83c7"}, - {file = "matplotlib-3.9.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6ee45bc4245533111ced13f1f2cace1e7f89d1c793390392a80c139d6cf0e6c"}, - {file = "matplotlib-3.9.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:306c8dfc73239f0e72ac50e5a9cf19cc4e8e331dd0c54f5e69ca8758550f1e1e"}, - {file = "matplotlib-3.9.2-cp312-cp312-win_amd64.whl", hash = "sha256:5413401594cfaff0052f9d8b1aafc6d305b4bd7c4331dccd18f561ff7e1d3bd3"}, - {file = "matplotlib-3.9.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:18128cc08f0d3cfff10b76baa2f296fc28c4607368a8402de61bb3f2eb33c7d9"}, - {file = "matplotlib-3.9.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4876d7d40219e8ae8bb70f9263bcbe5714415acfdf781086601211335e24f8aa"}, - {file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6d9f07a80deab4bb0b82858a9e9ad53d1382fd122be8cde11080f4e7dfedb38b"}, - {file = "matplotlib-3.9.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7c0410f181a531ec4e93bbc27692f2c71a15c2da16766f5ba9761e7ae518413"}, - {file = "matplotlib-3.9.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:909645cce2dc28b735674ce0931a4ac94e12f5b13f6bb0b5a5e65e7cea2c192b"}, - {file = "matplotlib-3.9.2-cp313-cp313-win_amd64.whl", hash = "sha256:f32c7410c7f246838a77d6d1eff0c0f87f3cb0e7c4247aebea71a6d5a68cab49"}, - {file = "matplotlib-3.9.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:37e51dd1c2db16ede9cfd7b5cabdfc818b2c6397c83f8b10e0e797501c963a03"}, - {file = "matplotlib-3.9.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b82c5045cebcecd8496a4d694d43f9cc84aeeb49fe2133e036b207abe73f4d30"}, - {file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f053c40f94bc51bc03832a41b4f153d83f2062d88c72b5e79997072594e97e51"}, - {file = "matplotlib-3.9.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dbe196377a8248972f5cede786d4c5508ed5f5ca4a1e09b44bda889958b33f8c"}, - {file = "matplotlib-3.9.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5816b1e1fe8c192cbc013f8f3e3368ac56fbecf02fb41b8f8559303f24c5015e"}, - {file = "matplotlib-3.9.2-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:cef2a73d06601437be399908cf13aee74e86932a5ccc6ccdf173408ebc5f6bb2"}, - {file = "matplotlib-3.9.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:e0830e188029c14e891fadd99702fd90d317df294c3298aad682739c5533721a"}, - {file = "matplotlib-3.9.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:03ba9c1299c920964e8d3857ba27173b4dbb51ca4bab47ffc2c2ba0eb5e2cbc5"}, - {file = "matplotlib-3.9.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1cd93b91ab47a3616b4d3c42b52f8363b88ca021e340804c6ab2536344fad9ca"}, - {file = "matplotlib-3.9.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:6d1ce5ed2aefcdce11904fc5bbea7d9c21fff3d5f543841edf3dea84451a09ea"}, - {file = "matplotlib-3.9.2-cp39-cp39-win_amd64.whl", hash = "sha256:b2696efdc08648536efd4e1601b5fd491fd47f4db97a5fbfd175549a7365c1b2"}, - {file = "matplotlib-3.9.2-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d52a3b618cb1cbb769ce2ee1dcdb333c3ab6e823944e9a2d36e37253815f9556"}, - {file = "matplotlib-3.9.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:039082812cacd6c6bec8e17a9c1e6baca230d4116d522e81e1f63a74d01d2e21"}, - {file = "matplotlib-3.9.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6758baae2ed64f2331d4fd19be38b7b4eae3ecec210049a26b6a4f3ae1c85dcc"}, - {file = "matplotlib-3.9.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:050598c2b29e0b9832cde72bcf97627bf00262adbc4a54e2b856426bb2ef0697"}, - {file = "matplotlib-3.9.2.tar.gz", hash = "sha256:96ab43906269ca64a6366934106fa01534454a69e471b7bf3d79083981aaab92"}, + {file = "matplotlib-3.9.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:41b016e3be4e740b66c79a031a0a6e145728dbc248142e751e8dab4f3188ca1d"}, + {file = "matplotlib-3.9.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e0143975fc2a6d7136c97e19c637321288371e8f09cff2564ecd73e865ea0b9"}, + {file = "matplotlib-3.9.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f459c8ee2c086455744723628264e43c884be0c7d7b45d84b8cd981310b4815"}, + {file = "matplotlib-3.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:687df7ceff57b8f070d02b4db66f75566370e7ae182a0782b6d3d21b0d6917dc"}, + {file = "matplotlib-3.9.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:edd14cf733fdc4f6e6fe3f705af97676a7e52859bf0044aa2c84e55be739241c"}, + {file = "matplotlib-3.9.3-cp310-cp310-win_amd64.whl", hash = "sha256:1c40c244221a1adbb1256692b1133c6fb89418df27bf759a31a333e7912a4010"}, + {file = "matplotlib-3.9.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:cf2a60daf6cecff6828bc608df00dbc794380e7234d2411c0ec612811f01969d"}, + {file = "matplotlib-3.9.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:213d6dc25ce686516208d8a3e91120c6a4fdae4a3e06b8505ced5b716b50cc04"}, + {file = "matplotlib-3.9.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c52f48eb75fcc119a4fdb68ba83eb5f71656999420375df7c94cc68e0e14686e"}, + {file = "matplotlib-3.9.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3c93796b44fa111049b88a24105e947f03c01966b5c0cc782e2ee3887b790a3"}, + {file = "matplotlib-3.9.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cd1077b9a09b16d8c3c7075a8add5ffbfe6a69156a57e290c800ed4d435bef1d"}, + {file = "matplotlib-3.9.3-cp311-cp311-win_amd64.whl", hash = "sha256:c96eeeb8c68b662c7747f91a385688d4b449687d29b691eff7068a4602fe6dc4"}, + {file = "matplotlib-3.9.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:0a361bd5583bf0bcc08841df3c10269617ee2a36b99ac39d455a767da908bbbc"}, + {file = "matplotlib-3.9.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e14485bb1b83eeb3d55b6878f9560240981e7bbc7a8d4e1e8c38b9bd6ec8d2de"}, + {file = "matplotlib-3.9.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4a8d279f78844aad213c4935c18f8292a9432d51af2d88bca99072c903948045"}, + {file = "matplotlib-3.9.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b6c12514329ac0d03128cf1dcceb335f4fbf7c11da98bca68dca8dcb983153a9"}, + {file = "matplotlib-3.9.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6e9de2b390d253a508dd497e9b5579f3a851f208763ed67fdca5dc0c3ea6849c"}, + {file = "matplotlib-3.9.3-cp312-cp312-win_amd64.whl", hash = "sha256:d796272408f8567ff7eaa00eb2856b3a00524490e47ad505b0b4ca6bb8a7411f"}, + {file = "matplotlib-3.9.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:203d18df84f5288973b2d56de63d4678cc748250026ca9e1ad8f8a0fd8a75d83"}, + {file = "matplotlib-3.9.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b651b0d3642991259109dc0351fc33ad44c624801367bb8307be9bfc35e427ad"}, + {file = "matplotlib-3.9.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:66d7b171fecf96940ce069923a08ba3df33ef542de82c2ff4fe8caa8346fa95a"}, + {file = "matplotlib-3.9.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6be0ba61f6ff2e6b68e4270fb63b6813c9e7dec3d15fc3a93f47480444fd72f0"}, + {file = "matplotlib-3.9.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d6b2e8856dec3a6db1ae51aec85c82223e834b228c1d3228aede87eee2b34f9"}, + {file = "matplotlib-3.9.3-cp313-cp313-win_amd64.whl", hash = "sha256:90a85a004fefed9e583597478420bf904bb1a065b0b0ee5b9d8d31b04b0f3f70"}, + {file = "matplotlib-3.9.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3119b2f16de7f7b9212ba76d8fe6a0e9f90b27a1e04683cd89833a991682f639"}, + {file = "matplotlib-3.9.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:87ad73763d93add1b6c1f9fcd33af662fd62ed70e620c52fcb79f3ac427cf3a6"}, + {file = "matplotlib-3.9.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:026bdf3137ab6022c866efa4813b6bbeddc2ed4c9e7e02f0e323a7bca380dfa0"}, + {file = "matplotlib-3.9.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:760a5e89ebbb172989e8273024a1024b0f084510b9105261b3b00c15e9c9f006"}, + {file = "matplotlib-3.9.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a42b9dc42de2cfe357efa27d9c50c7833fc5ab9b2eb7252ccd5d5f836a84e1e4"}, + {file = "matplotlib-3.9.3-cp313-cp313t-win_amd64.whl", hash = "sha256:e0fcb7da73fbf67b5f4bdaa57d85bb585a4e913d4a10f3e15b32baea56a67f0a"}, + {file = "matplotlib-3.9.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:031b7f5b8e595cc07def77ec5b58464e9bb67dc5760be5d6f26d9da24892481d"}, + {file = "matplotlib-3.9.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9fa6e193c14d6944e0685cdb527cb6b38b0e4a518043e7212f214113af7391da"}, + {file = "matplotlib-3.9.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e6eefae6effa0c35bbbc18c25ee6e0b1da44d2359c3cd526eb0c9e703cf055d"}, + {file = "matplotlib-3.9.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10d3e5c7a99bd28afb957e1ae661323b0800d75b419f24d041ed1cc5d844a764"}, + {file = "matplotlib-3.9.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:816a966d5d376bf24c92af8f379e78e67278833e4c7cbc9fa41872eec629a060"}, + {file = "matplotlib-3.9.3-cp39-cp39-win_amd64.whl", hash = "sha256:3fb0b37c896172899a4a93d9442ffdc6f870165f59e05ce2e07c6fded1c15749"}, + {file = "matplotlib-3.9.3-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:5f2a4ea08e6876206d511365b0bc234edc813d90b930be72c3011bbd7898796f"}, + {file = "matplotlib-3.9.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:9b081dac96ab19c54fd8558fac17c9d2c9cb5cc4656e7ed3261ddc927ba3e2c5"}, + {file = "matplotlib-3.9.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a0a63cb8404d1d1f94968ef35738900038137dab8af836b6c21bb6f03d75465"}, + {file = "matplotlib-3.9.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:896774766fd6be4571a43bc2fcbcb1dcca0807e53cab4a5bf88c4aa861a08e12"}, + {file = "matplotlib-3.9.3.tar.gz", hash = "sha256:cd5dbbc8e25cad5f706845c4d100e2c8b34691b412b93717ce38d8ae803bcfa5"}, ] [package.dependencies] @@ -2845,18 +2793,7 @@ pyparsing = ">=2.3.1" python-dateutil = ">=2.7" [package.extras] -dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setuptools (>=64)", "setuptools_scm (>=7)"] - -[[package]] -name = "mccabe" -version = "0.7.0" -description = "McCabe checker, plugin for flake8" -optional = false -python-versions = ">=3.6" -files = [ - {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"}, - {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, -] +dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6,!=2.13.3)", "setuptools (>=64)", "setuptools_scm (>=7)"] [[package]] name = "monotonic" @@ -3173,13 +3110,13 @@ httpx = ">=0.27.0,<0.28.0" [[package]] name = "openai" -version = "1.54.3" +version = "1.56.1" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" files = [ - {file = "openai-1.54.3-py3-none-any.whl", hash = "sha256:f18dbaf09c50d70c4185b892a2a553f80681d1d866323a2da7f7be2f688615d5"}, - {file = "openai-1.54.3.tar.gz", hash = "sha256:7511b74eeb894ac0b0253dc71f087a15d2e4d71d22d0088767205143d880cca6"}, + {file = "openai-1.56.1-py3-none-any.whl", hash = "sha256:38e61183c2a98fedebbbb04a909a052d9f897358b070483fc0caff17300a227c"}, + {file = "openai-1.56.1.tar.gz", hash = "sha256:8b0449f22a0c318441eae8a8a789753c3b2cac86542be51ca45df788e26aa180"}, ] [package.dependencies] @@ -3350,18 +3287,17 @@ files = [ [[package]] name = "patsy" -version = "0.5.6" +version = "1.0.1" description = "A Python package for describing statistical models and for building design matrices." optional = true -python-versions = "*" +python-versions = ">=3.6" files = [ - {file = "patsy-0.5.6-py2.py3-none-any.whl", hash = "sha256:19056886fd8fa71863fa32f0eb090267f21fb74be00f19f5c70b2e9d76c883c6"}, - {file = "patsy-0.5.6.tar.gz", hash = "sha256:95c6d47a7222535f84bff7f63d7303f2e297747a598db89cf5c67f0c0c7d2cdb"}, + {file = "patsy-1.0.1-py2.py3-none-any.whl", hash = "sha256:751fb38f9e97e62312e921a1954b81e1bb2bcda4f5eeabaf94db251ee791509c"}, + {file = "patsy-1.0.1.tar.gz", hash = "sha256:e786a9391eec818c054e359b737bbce692f051aee4c661f4141cc88fb459c0c4"}, ] [package.dependencies] numpy = ">=1.4" -six = "*" [package.extras] test = ["pytest", "pytest-cov", "scipy"] @@ -3535,13 +3471,13 @@ strenum = {version = ">=0.4.9,<0.5.0", markers = "python_version < \"3.11\""} [[package]] name = "posthog" -version = "3.7.0" +version = "3.7.4" description = "Integrate PostHog into any python application." optional = false python-versions = "*" files = [ - {file = "posthog-3.7.0-py2.py3-none-any.whl", hash = "sha256:3555161c3a9557b5666f96d8e1f17f410ea0f07db56e399e336a1656d4e5c722"}, - {file = "posthog-3.7.0.tar.gz", hash = "sha256:b095d4354ba23f8b346ab5daed8ecfc5108772f922006982dfe8b2d29ebc6e0e"}, + {file = "posthog-3.7.4-py2.py3-none-any.whl", hash = "sha256:21c18c6bf43b2de303ea4cd6e95804cc0f24c20cb2a96a8fd09da2ed50b62faa"}, + {file = "posthog-3.7.4.tar.gz", hash = "sha256:19384bd09d330f9787a7e2446aba14c8057ece56144970ea2791072d4e40cd36"}, ] [package.dependencies] @@ -3640,109 +3576,93 @@ virtualenv = ">=20.10.0" [[package]] name = "propcache" -version = "0.2.0" +version = "0.2.1" description = "Accelerated property cache" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"}, - {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"}, - {file = "propcache-0.2.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:33ac8f098df0585c0b53009f039dfd913b38c1d2edafed0cedcc0c32a05aa110"}, - {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e48e8875e6c13909c800fa344cd54cc4b2b0db1d5f911f840458a500fde2c2"}, - {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:388f3217649d6d59292b722d940d4d2e1e6a7003259eb835724092a1cca0203a"}, - {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f571aea50ba5623c308aa146eb650eebf7dbe0fd8c5d946e28343cb3b5aad577"}, - {file = "propcache-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3dfafb44f7bb35c0c06eda6b2ab4bfd58f02729e7c4045e179f9a861b07c9850"}, - {file = "propcache-0.2.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a3ebe9a75be7ab0b7da2464a77bb27febcb4fab46a34f9288f39d74833db7f61"}, - {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d2f0d0f976985f85dfb5f3d685697ef769faa6b71993b46b295cdbbd6be8cc37"}, - {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a3dc1a4b165283bd865e8f8cb5f0c64c05001e0718ed06250d8cac9bec115b48"}, - {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:9e0f07b42d2a50c7dd2d8675d50f7343d998c64008f1da5fef888396b7f84630"}, - {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e63e3e1e0271f374ed489ff5ee73d4b6e7c60710e1f76af5f0e1a6117cd26394"}, - {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:56bb5c98f058a41bb58eead194b4db8c05b088c93d94d5161728515bd52b052b"}, - {file = "propcache-0.2.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7665f04d0c7f26ff8bb534e1c65068409bf4687aa2534faf7104d7182debb336"}, - {file = "propcache-0.2.0-cp310-cp310-win32.whl", hash = "sha256:7cf18abf9764746b9c8704774d8b06714bcb0a63641518a3a89c7f85cc02c2ad"}, - {file = "propcache-0.2.0-cp310-cp310-win_amd64.whl", hash = "sha256:cfac69017ef97db2438efb854edf24f5a29fd09a536ff3a992b75990720cdc99"}, - {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:63f13bf09cc3336eb04a837490b8f332e0db41da66995c9fd1ba04552e516354"}, - {file = "propcache-0.2.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:608cce1da6f2672a56b24a015b42db4ac612ee709f3d29f27a00c943d9e851de"}, - {file = "propcache-0.2.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:466c219deee4536fbc83c08d09115249db301550625c7fef1c5563a584c9bc87"}, - {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fc2db02409338bf36590aa985a461b2c96fce91f8e7e0f14c50c5fcc4f229016"}, - {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a6ed8db0a556343d566a5c124ee483ae113acc9a557a807d439bcecc44e7dfbb"}, - {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:91997d9cb4a325b60d4e3f20967f8eb08dfcb32b22554d5ef78e6fd1dda743a2"}, - {file = "propcache-0.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c7dde9e533c0a49d802b4f3f218fa9ad0a1ce21f2c2eb80d5216565202acab4"}, - {file = "propcache-0.2.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffcad6c564fe6b9b8916c1aefbb37a362deebf9394bd2974e9d84232e3e08504"}, - {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:97a58a28bcf63284e8b4d7b460cbee1edaab24634e82059c7b8c09e65284f178"}, - {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:945db8ee295d3af9dbdbb698cce9bbc5c59b5c3fe328bbc4387f59a8a35f998d"}, - {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:39e104da444a34830751715f45ef9fc537475ba21b7f1f5b0f4d71a3b60d7fe2"}, - {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:c5ecca8f9bab618340c8e848d340baf68bcd8ad90a8ecd7a4524a81c1764b3db"}, - {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c436130cc779806bdf5d5fae0d848713105472b8566b75ff70048c47d3961c5b"}, - {file = "propcache-0.2.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:191db28dc6dcd29d1a3e063c3be0b40688ed76434622c53a284e5427565bbd9b"}, - {file = "propcache-0.2.0-cp311-cp311-win32.whl", hash = "sha256:5f2564ec89058ee7c7989a7b719115bdfe2a2fb8e7a4543b8d1c0cc4cf6478c1"}, - {file = "propcache-0.2.0-cp311-cp311-win_amd64.whl", hash = "sha256:6e2e54267980349b723cff366d1e29b138b9a60fa376664a157a342689553f71"}, - {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:2ee7606193fb267be4b2e3b32714f2d58cad27217638db98a60f9efb5efeccc2"}, - {file = "propcache-0.2.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:91ee8fc02ca52e24bcb77b234f22afc03288e1dafbb1f88fe24db308910c4ac7"}, - {file = "propcache-0.2.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e900bad2a8456d00a113cad8c13343f3b1f327534e3589acc2219729237a2e8"}, - {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f52a68c21363c45297aca15561812d542f8fc683c85201df0bebe209e349f793"}, - {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1e41d67757ff4fbc8ef2af99b338bfb955010444b92929e9e55a6d4dcc3c4f09"}, - {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a64e32f8bd94c105cc27f42d3b658902b5bcc947ece3c8fe7bc1b05982f60e89"}, - {file = "propcache-0.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:55346705687dbd7ef0d77883ab4f6fabc48232f587925bdaf95219bae072491e"}, - {file = "propcache-0.2.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:00181262b17e517df2cd85656fcd6b4e70946fe62cd625b9d74ac9977b64d8d9"}, - {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6994984550eaf25dd7fc7bd1b700ff45c894149341725bb4edc67f0ffa94efa4"}, - {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:56295eb1e5f3aecd516d91b00cfd8bf3a13991de5a479df9e27dd569ea23959c"}, - {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:439e76255daa0f8151d3cb325f6dd4a3e93043e6403e6491813bcaaaa8733887"}, - {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f6475a1b2ecb310c98c28d271a30df74f9dd436ee46d09236a6b750a7599ce57"}, - {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3444cdba6628accf384e349014084b1cacd866fbb88433cd9d279d90a54e0b23"}, - {file = "propcache-0.2.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:4a9d9b4d0a9b38d1c391bb4ad24aa65f306c6f01b512e10a8a34a2dc5675d348"}, - {file = "propcache-0.2.0-cp312-cp312-win32.whl", hash = "sha256:69d3a98eebae99a420d4b28756c8ce6ea5a29291baf2dc9ff9414b42676f61d5"}, - {file = "propcache-0.2.0-cp312-cp312-win_amd64.whl", hash = "sha256:ad9c9b99b05f163109466638bd30ada1722abb01bbb85c739c50b6dc11f92dc3"}, - {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ecddc221a077a8132cf7c747d5352a15ed763b674c0448d811f408bf803d9ad7"}, - {file = "propcache-0.2.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0e53cb83fdd61cbd67202735e6a6687a7b491c8742dfc39c9e01e80354956763"}, - {file = "propcache-0.2.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:92fe151145a990c22cbccf9ae15cae8ae9eddabfc949a219c9f667877e40853d"}, - {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d6a21ef516d36909931a2967621eecb256018aeb11fc48656e3257e73e2e247a"}, - {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3f88a4095e913f98988f5b338c1d4d5d07dbb0b6bad19892fd447484e483ba6b"}, - {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5a5b3bb545ead161be780ee85a2b54fdf7092815995661947812dde94a40f6fb"}, - {file = "propcache-0.2.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67aeb72e0f482709991aa91345a831d0b707d16b0257e8ef88a2ad246a7280bf"}, - {file = "propcache-0.2.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c997f8c44ec9b9b0bcbf2d422cc00a1d9b9c681f56efa6ca149a941e5560da2"}, - {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2a66df3d4992bc1d725b9aa803e8c5a66c010c65c741ad901e260ece77f58d2f"}, - {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:3ebbcf2a07621f29638799828b8d8668c421bfb94c6cb04269130d8de4fb7136"}, - {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1235c01ddaa80da8235741e80815ce381c5267f96cc49b1477fdcf8c047ef325"}, - {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3947483a381259c06921612550867b37d22e1df6d6d7e8361264b6d037595f44"}, - {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:d5bed7f9805cc29c780f3aee05de3262ee7ce1f47083cfe9f77471e9d6777e83"}, - {file = "propcache-0.2.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e4a91d44379f45f5e540971d41e4626dacd7f01004826a18cb048e7da7e96544"}, - {file = "propcache-0.2.0-cp313-cp313-win32.whl", hash = "sha256:f902804113e032e2cdf8c71015651c97af6418363bea8d78dc0911d56c335032"}, - {file = "propcache-0.2.0-cp313-cp313-win_amd64.whl", hash = "sha256:8f188cfcc64fb1266f4684206c9de0e80f54622c3f22a910cbd200478aeae61e"}, - {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:53d1bd3f979ed529f0805dd35ddaca330f80a9a6d90bc0121d2ff398f8ed8861"}, - {file = "propcache-0.2.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:83928404adf8fb3d26793665633ea79b7361efa0287dfbd372a7e74311d51ee6"}, - {file = "propcache-0.2.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77a86c261679ea5f3896ec060be9dc8e365788248cc1e049632a1be682442063"}, - {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:218db2a3c297a3768c11a34812e63b3ac1c3234c3a086def9c0fee50d35add1f"}, - {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7735e82e3498c27bcb2d17cb65d62c14f1100b71723b68362872bca7d0913d90"}, - {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:20a617c776f520c3875cf4511e0d1db847a076d720714ae35ffe0df3e440be68"}, - {file = "propcache-0.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67b69535c870670c9f9b14a75d28baa32221d06f6b6fa6f77a0a13c5a7b0a5b9"}, - {file = "propcache-0.2.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4569158070180c3855e9c0791c56be3ceeb192defa2cdf6a3f39e54319e56b89"}, - {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:db47514ffdbd91ccdc7e6f8407aac4ee94cc871b15b577c1c324236b013ddd04"}, - {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_armv7l.whl", hash = "sha256:2a60ad3e2553a74168d275a0ef35e8c0a965448ffbc3b300ab3a5bb9956c2162"}, - {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:662dd62358bdeaca0aee5761de8727cfd6861432e3bb828dc2a693aa0471a563"}, - {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:25a1f88b471b3bc911d18b935ecb7115dff3a192b6fef46f0bfaf71ff4f12418"}, - {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:f60f0ac7005b9f5a6091009b09a419ace1610e163fa5deaba5ce3484341840e7"}, - {file = "propcache-0.2.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:74acd6e291f885678631b7ebc85d2d4aec458dd849b8c841b57ef04047833bed"}, - {file = "propcache-0.2.0-cp38-cp38-win32.whl", hash = "sha256:d9b6ddac6408194e934002a69bcaadbc88c10b5f38fb9307779d1c629181815d"}, - {file = "propcache-0.2.0-cp38-cp38-win_amd64.whl", hash = "sha256:676135dcf3262c9c5081cc8f19ad55c8a64e3f7282a21266d05544450bffc3a5"}, - {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:25c8d773a62ce0451b020c7b29a35cfbc05de8b291163a7a0f3b7904f27253e6"}, - {file = "propcache-0.2.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:375a12d7556d462dc64d70475a9ee5982465fbb3d2b364f16b86ba9135793638"}, - {file = "propcache-0.2.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1ec43d76b9677637a89d6ab86e1fef70d739217fefa208c65352ecf0282be957"}, - {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f45eec587dafd4b2d41ac189c2156461ebd0c1082d2fe7013571598abb8505d1"}, - {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc092ba439d91df90aea38168e11f75c655880c12782facf5cf9c00f3d42b562"}, - {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fa1076244f54bb76e65e22cb6910365779d5c3d71d1f18b275f1dfc7b0d71b4d"}, - {file = "propcache-0.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:682a7c79a2fbf40f5dbb1eb6bfe2cd865376deeac65acf9beb607505dced9e12"}, - {file = "propcache-0.2.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8e40876731f99b6f3c897b66b803c9e1c07a989b366c6b5b475fafd1f7ba3fb8"}, - {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:363ea8cd3c5cb6679f1c2f5f1f9669587361c062e4899fce56758efa928728f8"}, - {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:140fbf08ab3588b3468932974a9331aff43c0ab8a2ec2c608b6d7d1756dbb6cb"}, - {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:e70fac33e8b4ac63dfc4c956fd7d85a0b1139adcfc0d964ce288b7c527537fea"}, - {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:b33d7a286c0dc1a15f5fc864cc48ae92a846df287ceac2dd499926c3801054a6"}, - {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:f6d5749fdd33d90e34c2efb174c7e236829147a2713334d708746e94c4bde40d"}, - {file = "propcache-0.2.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22aa8f2272d81d9317ff5756bb108021a056805ce63dd3630e27d042c8092798"}, - {file = "propcache-0.2.0-cp39-cp39-win32.whl", hash = "sha256:73e4b40ea0eda421b115248d7e79b59214411109a5bc47d0d48e4c73e3b8fcf9"}, - {file = "propcache-0.2.0-cp39-cp39-win_amd64.whl", hash = "sha256:9517d5e9e0731957468c29dbfd0f976736a0e55afaea843726e887f36fe017df"}, - {file = "propcache-0.2.0-py3-none-any.whl", hash = "sha256:2ccc28197af5313706511fab3a8b66dcd6da067a1331372c82ea1cb74285e036"}, - {file = "propcache-0.2.0.tar.gz", hash = "sha256:df81779732feb9d01e5d513fad0122efb3d53bbc75f61b2a4f29a020bc985e70"}, + {file = "propcache-0.2.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6b3f39a85d671436ee3d12c017f8fdea38509e4f25b28eb25877293c98c243f6"}, + {file = "propcache-0.2.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:39d51fbe4285d5db5d92a929e3e21536ea3dd43732c5b177c7ef03f918dff9f2"}, + {file = "propcache-0.2.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6445804cf4ec763dc70de65a3b0d9954e868609e83850a47ca4f0cb64bd79fea"}, + {file = "propcache-0.2.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9479aa06a793c5aeba49ce5c5692ffb51fcd9a7016e017d555d5e2b0045d212"}, + {file = "propcache-0.2.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d9631c5e8b5b3a0fda99cb0d29c18133bca1e18aea9effe55adb3da1adef80d3"}, + {file = "propcache-0.2.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3156628250f46a0895f1f36e1d4fbe062a1af8718ec3ebeb746f1d23f0c5dc4d"}, + {file = "propcache-0.2.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b6fb63ae352e13748289f04f37868099e69dba4c2b3e271c46061e82c745634"}, + {file = "propcache-0.2.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:887d9b0a65404929641a9fabb6452b07fe4572b269d901d622d8a34a4e9043b2"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a96dc1fa45bd8c407a0af03b2d5218392729e1822b0c32e62c5bf7eeb5fb3958"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:a7e65eb5c003a303b94aa2c3852ef130230ec79e349632d030e9571b87c4698c"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:999779addc413181912e984b942fbcc951be1f5b3663cd80b2687758f434c583"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:19a0f89a7bb9d8048d9c4370c9c543c396e894c76be5525f5e1ad287f1750ddf"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:1ac2f5fe02fa75f56e1ad473f1175e11f475606ec9bd0be2e78e4734ad575034"}, + {file = "propcache-0.2.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:574faa3b79e8ebac7cb1d7930f51184ba1ccf69adfdec53a12f319a06030a68b"}, + {file = "propcache-0.2.1-cp310-cp310-win32.whl", hash = "sha256:03ff9d3f665769b2a85e6157ac8b439644f2d7fd17615a82fa55739bc97863f4"}, + {file = "propcache-0.2.1-cp310-cp310-win_amd64.whl", hash = "sha256:2d3af2e79991102678f53e0dbf4c35de99b6b8b58f29a27ca0325816364caaba"}, + {file = "propcache-0.2.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ffc3cca89bb438fb9c95c13fc874012f7b9466b89328c3c8b1aa93cdcfadd16"}, + {file = "propcache-0.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f174bbd484294ed9fdf09437f889f95807e5f229d5d93588d34e92106fbf6717"}, + {file = "propcache-0.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:70693319e0b8fd35dd863e3e29513875eb15c51945bf32519ef52927ca883bc3"}, + {file = "propcache-0.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b480c6a4e1138e1aa137c0079b9b6305ec6dcc1098a8ca5196283e8a49df95a9"}, + {file = "propcache-0.2.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d27b84d5880f6d8aa9ae3edb253c59d9f6642ffbb2c889b78b60361eed449787"}, + {file = "propcache-0.2.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:857112b22acd417c40fa4595db2fe28ab900c8c5fe4670c7989b1c0230955465"}, + {file = "propcache-0.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cf6c4150f8c0e32d241436526f3c3f9cbd34429492abddbada2ffcff506c51af"}, + {file = "propcache-0.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66d4cfda1d8ed687daa4bc0274fcfd5267873db9a5bc0418c2da19273040eeb7"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c2f992c07c0fca81655066705beae35fc95a2fa7366467366db627d9f2ee097f"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:4a571d97dbe66ef38e472703067021b1467025ec85707d57e78711c085984e54"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:bb6178c241278d5fe853b3de743087be7f5f4c6f7d6d22a3b524d323eecec505"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:ad1af54a62ffe39cf34db1aa6ed1a1873bd548f6401db39d8e7cd060b9211f82"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:e7048abd75fe40712005bcfc06bb44b9dfcd8e101dda2ecf2f5aa46115ad07ca"}, + {file = "propcache-0.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:160291c60081f23ee43d44b08a7e5fb76681221a8e10b3139618c5a9a291b84e"}, + {file = "propcache-0.2.1-cp311-cp311-win32.whl", hash = "sha256:819ce3b883b7576ca28da3861c7e1a88afd08cc8c96908e08a3f4dd64a228034"}, + {file = "propcache-0.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:edc9fc7051e3350643ad929df55c451899bb9ae6d24998a949d2e4c87fb596d3"}, + {file = "propcache-0.2.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:081a430aa8d5e8876c6909b67bd2d937bfd531b0382d3fdedb82612c618bc41a"}, + {file = "propcache-0.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2ccec9ac47cf4e04897619c0e0c1a48c54a71bdf045117d3a26f80d38ab1fb0"}, + {file = "propcache-0.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:14d86fe14b7e04fa306e0c43cdbeebe6b2c2156a0c9ce56b815faacc193e320d"}, + {file = "propcache-0.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:049324ee97bb67285b49632132db351b41e77833678432be52bdd0289c0e05e4"}, + {file = "propcache-0.2.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1cd9a1d071158de1cc1c71a26014dcdfa7dd3d5f4f88c298c7f90ad6f27bb46d"}, + {file = "propcache-0.2.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:98110aa363f1bb4c073e8dcfaefd3a5cea0f0834c2aab23dda657e4dab2f53b5"}, + {file = "propcache-0.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:647894f5ae99c4cf6bb82a1bb3a796f6e06af3caa3d32e26d2350d0e3e3faf24"}, + {file = "propcache-0.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bfd3223c15bebe26518d58ccf9a39b93948d3dcb3e57a20480dfdd315356baff"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d71264a80f3fcf512eb4f18f59423fe82d6e346ee97b90625f283df56aee103f"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:e73091191e4280403bde6c9a52a6999d69cdfde498f1fdf629105247599b57ec"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:3935bfa5fede35fb202c4b569bb9c042f337ca4ff7bd540a0aa5e37131659348"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:f508b0491767bb1f2b87fdfacaba5f7eddc2f867740ec69ece6d1946d29029a6"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:1672137af7c46662a1c2be1e8dc78cb6d224319aaa40271c9257d886be4363a6"}, + {file = "propcache-0.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b74c261802d3d2b85c9df2dfb2fa81b6f90deeef63c2db9f0e029a3cac50b518"}, + {file = "propcache-0.2.1-cp312-cp312-win32.whl", hash = "sha256:d09c333d36c1409d56a9d29b3a1b800a42c76a57a5a8907eacdbce3f18768246"}, + {file = "propcache-0.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:c214999039d4f2a5b2073ac506bba279945233da8c786e490d411dfc30f855c1"}, + {file = "propcache-0.2.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aca405706e0b0a44cc6bfd41fbe89919a6a56999157f6de7e182a990c36e37bc"}, + {file = "propcache-0.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:12d1083f001ace206fe34b6bdc2cb94be66d57a850866f0b908972f90996b3e9"}, + {file = "propcache-0.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d93f3307ad32a27bda2e88ec81134b823c240aa3abb55821a8da553eed8d9439"}, + {file = "propcache-0.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba278acf14471d36316159c94a802933d10b6a1e117b8554fe0d0d9b75c9d536"}, + {file = "propcache-0.2.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4e6281aedfca15301c41f74d7005e6e3f4ca143584ba696ac69df4f02f40d629"}, + {file = "propcache-0.2.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b750a8e5a1262434fb1517ddf64b5de58327f1adc3524a5e44c2ca43305eb0b"}, + {file = "propcache-0.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf72af5e0fb40e9babf594308911436c8efde3cb5e75b6f206c34ad18be5c052"}, + {file = "propcache-0.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b2d0a12018b04f4cb820781ec0dffb5f7c7c1d2a5cd22bff7fb055a2cb19ebce"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e800776a79a5aabdb17dcc2346a7d66d0777e942e4cd251defeb084762ecd17d"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:4160d9283bd382fa6c0c2b5e017acc95bc183570cd70968b9202ad6d8fc48dce"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:30b43e74f1359353341a7adb783c8f1b1c676367b011709f466f42fda2045e95"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:58791550b27d5488b1bb52bc96328456095d96206a250d28d874fafe11b3dfaf"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:0f022d381747f0dfe27e99d928e31bc51a18b65bb9e481ae0af1380a6725dd1f"}, + {file = "propcache-0.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:297878dc9d0a334358f9b608b56d02e72899f3b8499fc6044133f0d319e2ec30"}, + {file = "propcache-0.2.1-cp313-cp313-win32.whl", hash = "sha256:ddfab44e4489bd79bda09d84c430677fc7f0a4939a73d2bba3073036f487a0a6"}, + {file = "propcache-0.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:556fc6c10989f19a179e4321e5d678db8eb2924131e64652a51fe83e4c3db0e1"}, + {file = "propcache-0.2.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:6a9a8c34fb7bb609419a211e59da8887eeca40d300b5ea8e56af98f6fbbb1541"}, + {file = "propcache-0.2.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ae1aa1cd222c6d205853b3013c69cd04515f9d6ab6de4b0603e2e1c33221303e"}, + {file = "propcache-0.2.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:accb6150ce61c9c4b7738d45550806aa2b71c7668c6942f17b0ac182b6142fd4"}, + {file = "propcache-0.2.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5eee736daafa7af6d0a2dc15cc75e05c64f37fc37bafef2e00d77c14171c2097"}, + {file = "propcache-0.2.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f7a31fc1e1bd362874863fdeed71aed92d348f5336fd84f2197ba40c59f061bd"}, + {file = "propcache-0.2.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cba4cfa1052819d16699e1d55d18c92b6e094d4517c41dd231a8b9f87b6fa681"}, + {file = "propcache-0.2.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f089118d584e859c62b3da0892b88a83d611c2033ac410e929cb6754eec0ed16"}, + {file = "propcache-0.2.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:781e65134efaf88feb447e8c97a51772aa75e48b794352f94cb7ea717dedda0d"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31f5af773530fd3c658b32b6bdc2d0838543de70eb9a2156c03e410f7b0d3aae"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:a7a078f5d37bee6690959c813977da5291b24286e7b962e62a94cec31aa5188b"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:cea7daf9fc7ae6687cf1e2c049752f19f146fdc37c2cc376e7d0032cf4f25347"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:8b3489ff1ed1e8315674d0775dc7d2195fb13ca17b3808721b54dbe9fd020faf"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:9403db39be1393618dd80c746cb22ccda168efce239c73af13c3763ef56ffc04"}, + {file = "propcache-0.2.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5d97151bc92d2b2578ff7ce779cdb9174337390a535953cbb9452fb65164c587"}, + {file = "propcache-0.2.1-cp39-cp39-win32.whl", hash = "sha256:9caac6b54914bdf41bcc91e7eb9147d331d29235a7c967c150ef5df6464fd1bb"}, + {file = "propcache-0.2.1-cp39-cp39-win_amd64.whl", hash = "sha256:92fc4500fcb33899b05ba73276dfb684a20d31caa567b7cb5252d48f896a91b1"}, + {file = "propcache-0.2.1-py3-none-any.whl", hash = "sha256:52277518d6aae65536e9cea52d4e7fd2f7a66f4aa2d30ed3f2fcea620ace3c54"}, + {file = "propcache-0.2.1.tar.gz", hash = "sha256:3f77ce728b19cb537714499928fe800c3dda29e8d9428778fc7c186da4c09a64"}, ] [[package]] @@ -3944,17 +3864,6 @@ files = [ {file = "psycopg2_binary-2.9.10-cp39-cp39-win_amd64.whl", hash = "sha256:30e34c4e97964805f715206c7b789d54a78b70f3ff19fbe590104b71c45600e5"}, ] -[[package]] -name = "pycodestyle" -version = "2.11.1" -description = "Python style guide checker" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, - {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, -] - [[package]] name = "pycparser" version = "2.22" @@ -3968,20 +3877,20 @@ files = [ [[package]] name = "pydantic" -version = "2.9.2" +version = "2.10.3" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12"}, - {file = "pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f"}, + {file = "pydantic-2.10.3-py3-none-any.whl", hash = "sha256:be04d85bbc7b65651c5f8e6b9976ed9c6f41782a55524cef079a34a0bb82144d"}, + {file = "pydantic-2.10.3.tar.gz", hash = "sha256:cb5ac360ce894ceacd69c403187900a02c4b20b693a9dd1d643e1effab9eadf9"}, ] [package.dependencies] annotated-types = ">=0.6.0" email-validator = {version = ">=2.0.0", optional = true, markers = "extra == \"email\""} -pydantic-core = "2.23.4" -typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} +pydantic-core = "2.27.1" +typing-extensions = ">=4.12.2" [package.extras] email = ["email-validator (>=2.0.0)"] @@ -3989,125 +3898,125 @@ timezone = ["tzdata"] [[package]] name = "pydantic-core" -version = "2.23.4" +version = "2.27.1" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.23.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b10bd51f823d891193d4717448fab065733958bdb6a6b351967bd349d48d5c9b"}, - {file = "pydantic_core-2.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4fc714bdbfb534f94034efaa6eadd74e5b93c8fa6315565a222f7b6f42ca1166"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e46b3169866bd62849936de036f901a9356e36376079b05efa83caeaa02ceb"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed1a53de42fbe34853ba90513cea21673481cd81ed1be739f7f2efb931b24916"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cfdd16ab5e59fc31b5e906d1a3f666571abc367598e3e02c83403acabc092e07"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255a8ef062cbf6674450e668482456abac99a5583bbafb73f9ad469540a3a232"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a7cd62e831afe623fbb7aabbb4fe583212115b3ef38a9f6b71869ba644624a2"}, - {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f09e2ff1f17c2b51f2bc76d1cc33da96298f0a036a137f5440ab3ec5360b624f"}, - {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e38e63e6f3d1cec5a27e0afe90a085af8b6806ee208b33030e65b6516353f1a3"}, - {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0dbd8dbed2085ed23b5c04afa29d8fd2771674223135dc9bc937f3c09284d071"}, - {file = "pydantic_core-2.23.4-cp310-none-win32.whl", hash = "sha256:6531b7ca5f951d663c339002e91aaebda765ec7d61b7d1e3991051906ddde119"}, - {file = "pydantic_core-2.23.4-cp310-none-win_amd64.whl", hash = "sha256:7c9129eb40958b3d4500fa2467e6a83356b3b61bfff1b414c7361d9220f9ae8f"}, - {file = "pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8"}, - {file = "pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e"}, - {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b"}, - {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0"}, - {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64"}, - {file = "pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f"}, - {file = "pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3"}, - {file = "pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231"}, - {file = "pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36"}, - {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126"}, - {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e"}, - {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24"}, - {file = "pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84"}, - {file = "pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9"}, - {file = "pydantic_core-2.23.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7530e201d10d7d14abce4fb54cfe5b94a0aefc87da539d0346a484ead376c3cc"}, - {file = "pydantic_core-2.23.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df933278128ea1cd77772673c73954e53a1c95a4fdf41eef97c2b779271bd0bd"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cb3da3fd1b6a5d0279a01877713dbda118a2a4fc6f0d821a57da2e464793f05"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c6dcb030aefb668a2b7009c85b27f90e51e6a3b4d5c9bc4c57631292015b0d"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:696dd8d674d6ce621ab9d45b205df149399e4bb9aa34102c970b721554828510"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2971bb5ffe72cc0f555c13e19b23c85b654dd2a8f7ab493c262071377bfce9f6"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8394d940e5d400d04cad4f75c0598665cbb81aecefaca82ca85bd28264af7f9b"}, - {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dff76e0602ca7d4cdaacc1ac4c005e0ce0dcfe095d5b5259163a80d3a10d327"}, - {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7d32706badfe136888bdea71c0def994644e09fff0bfe47441deaed8e96fdbc6"}, - {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed541d70698978a20eb63d8c5d72f2cc6d7079d9d90f6b50bad07826f1320f5f"}, - {file = "pydantic_core-2.23.4-cp313-none-win32.whl", hash = "sha256:3d5639516376dce1940ea36edf408c554475369f5da2abd45d44621cb616f769"}, - {file = "pydantic_core-2.23.4-cp313-none-win_amd64.whl", hash = "sha256:5a1504ad17ba4210df3a045132a7baeeba5a200e930f57512ee02909fc5c4cb5"}, - {file = "pydantic_core-2.23.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d4488a93b071c04dc20f5cecc3631fc78b9789dd72483ba15d423b5b3689b555"}, - {file = "pydantic_core-2.23.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:81965a16b675b35e1d09dd14df53f190f9129c0202356ed44ab2728b1c905658"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffa2ebd4c8530079140dd2d7f794a9d9a73cbb8e9d59ffe24c63436efa8f271"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:61817945f2fe7d166e75fbfb28004034b48e44878177fc54d81688e7b85a3665"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29d2c342c4bc01b88402d60189f3df065fb0dda3654744d5a165a5288a657368"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5e11661ce0fd30a6790e8bcdf263b9ec5988e95e63cf901972107efc49218b13"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d18368b137c6295db49ce7218b1a9ba15c5bc254c96d7c9f9e924a9bc7825ad"}, - {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec4e55f79b1c4ffb2eecd8a0cfba9955a2588497d96851f4c8f99aa4a1d39b12"}, - {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:374a5e5049eda9e0a44c696c7ade3ff355f06b1fe0bb945ea3cac2bc336478a2"}, - {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5c364564d17da23db1106787675fc7af45f2f7b58b4173bfdd105564e132e6fb"}, - {file = "pydantic_core-2.23.4-cp38-none-win32.whl", hash = "sha256:d7a80d21d613eec45e3d41eb22f8f94ddc758a6c4720842dc74c0581f54993d6"}, - {file = "pydantic_core-2.23.4-cp38-none-win_amd64.whl", hash = "sha256:5f5ff8d839f4566a474a969508fe1c5e59c31c80d9e140566f9a37bba7b8d556"}, - {file = "pydantic_core-2.23.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a4fa4fc04dff799089689f4fd502ce7d59de529fc2f40a2c8836886c03e0175a"}, - {file = "pydantic_core-2.23.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7df63886be5e270da67e0966cf4afbae86069501d35c8c1b3b6c168f42cb36"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcedcd19a557e182628afa1d553c3895a9f825b936415d0dbd3cd0bbcfd29b4b"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f54b118ce5de9ac21c363d9b3caa6c800341e8c47a508787e5868c6b79c9323"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d2f57d3e1379a9525c5ab067b27dbb8a0642fb5d454e17a9ac434f9ce523e3"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de6d1d1b9e5101508cb37ab0d972357cac5235f5c6533d1071964c47139257df"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1278e0d324f6908e872730c9102b0112477a7f7cf88b308e4fc36ce1bdb6d58c"}, - {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a6b5099eeec78827553827f4c6b8615978bb4b6a88e5d9b93eddf8bb6790f55"}, - {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e55541f756f9b3ee346b840103f32779c695a19826a4c442b7954550a0972040"}, - {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a5c7ba8ffb6d6f8f2ab08743be203654bb1aaa8c9dcb09f82ddd34eadb695605"}, - {file = "pydantic_core-2.23.4-cp39-none-win32.whl", hash = "sha256:37b0fe330e4a58d3c58b24d91d1eb102aeec675a3db4c292ec3928ecd892a9a6"}, - {file = "pydantic_core-2.23.4-cp39-none-win_amd64.whl", hash = "sha256:1498bec4c05c9c787bde9125cfdcc63a41004ff167f495063191b863399b1a29"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f455ee30a9d61d3e1a15abd5068827773d6e4dc513e795f380cdd59932c782d5"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e90d2e3bd2c3863d48525d297cd143fe541be8bbf6f579504b9712cb6b643ec"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e203fdf807ac7e12ab59ca2bfcabb38c7cf0b33c41efeb00f8e5da1d86af480"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08277a400de01bc72436a0ccd02bdf596631411f592ad985dcee21445bd0068"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f220b0eea5965dec25480b6333c788fb72ce5f9129e8759ef876a1d805d00801"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d06b0c8da4f16d1d1e352134427cb194a0a6e19ad5db9161bf32b2113409e728"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ba1a0996f6c2773bd83e63f18914c1de3c9dd26d55f4ac302a7efe93fb8e7433"}, - {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:78ddaaa81421a29574a682b3179d4cf9e6d405a09b99d93ddcf7e5239c742e21"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:883a91b5dd7d26492ff2f04f40fbb652de40fcc0afe07e8129e8ae779c2110eb"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88ad334a15b32a791ea935af224b9de1bf99bcd62fabf745d5f3442199d86d59"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233710f069d251feb12a56da21e14cca67994eab08362207785cf8c598e74577"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19442362866a753485ba5e4be408964644dd6a09123d9416c54cd49171f50744"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:624e278a7d29b6445e4e813af92af37820fafb6dcc55c012c834f9e26f9aaaef"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5ef8f42bec47f21d07668a043f077d507e5bf4e668d5c6dfe6aaba89de1a5b8"}, - {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:aea443fffa9fbe3af1a9ba721a87f926fe548d32cab71d188a6ede77d0ff244e"}, - {file = "pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863"}, + {file = "pydantic_core-2.27.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:71a5e35c75c021aaf400ac048dacc855f000bdfed91614b4a726f7432f1f3d6a"}, + {file = "pydantic_core-2.27.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f82d068a2d6ecfc6e054726080af69a6764a10015467d7d7b9f66d6ed5afa23b"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:121ceb0e822f79163dd4699e4c54f5ad38b157084d97b34de8b232bcaad70278"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4603137322c18eaf2e06a4495f426aa8d8388940f3c457e7548145011bb68e05"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a33cd6ad9017bbeaa9ed78a2e0752c5e250eafb9534f308e7a5f7849b0b1bfb4"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:15cc53a3179ba0fcefe1e3ae50beb2784dede4003ad2dfd24f81bba4b23a454f"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45d9c5eb9273aa50999ad6adc6be5e0ecea7e09dbd0d31bd0c65a55a2592ca08"}, + {file = "pydantic_core-2.27.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8bf7b66ce12a2ac52d16f776b31d16d91033150266eb796967a7e4621707e4f6"}, + {file = "pydantic_core-2.27.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:655d7dd86f26cb15ce8a431036f66ce0318648f8853d709b4167786ec2fa4807"}, + {file = "pydantic_core-2.27.1-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:5556470f1a2157031e676f776c2bc20acd34c1990ca5f7e56f1ebf938b9ab57c"}, + {file = "pydantic_core-2.27.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f69ed81ab24d5a3bd93861c8c4436f54afdf8e8cc421562b0c7504cf3be58206"}, + {file = "pydantic_core-2.27.1-cp310-none-win32.whl", hash = "sha256:f5a823165e6d04ccea61a9f0576f345f8ce40ed533013580e087bd4d7442b52c"}, + {file = "pydantic_core-2.27.1-cp310-none-win_amd64.whl", hash = "sha256:57866a76e0b3823e0b56692d1a0bf722bffb324839bb5b7226a7dbd6c9a40b17"}, + {file = "pydantic_core-2.27.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ac3b20653bdbe160febbea8aa6c079d3df19310d50ac314911ed8cc4eb7f8cb8"}, + {file = "pydantic_core-2.27.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a5a8e19d7c707c4cadb8c18f5f60c843052ae83c20fa7d44f41594c644a1d330"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7f7059ca8d64fea7f238994c97d91f75965216bcbe5f695bb44f354893f11d52"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bed0f8a0eeea9fb72937ba118f9db0cb7e90773462af7962d382445f3005e5a4"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a3cb37038123447cf0f3ea4c74751f6a9d7afef0eb71aa07bf5f652b5e6a132c"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:84286494f6c5d05243456e04223d5a9417d7f443c3b76065e75001beb26f88de"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:acc07b2cfc5b835444b44a9956846b578d27beeacd4b52e45489e93276241025"}, + {file = "pydantic_core-2.27.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4fefee876e07a6e9aad7a8c8c9f85b0cdbe7df52b8a9552307b09050f7512c7e"}, + {file = "pydantic_core-2.27.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:258c57abf1188926c774a4c94dd29237e77eda19462e5bb901d88adcab6af919"}, + {file = "pydantic_core-2.27.1-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:35c14ac45fcfdf7167ca76cc80b2001205a8d5d16d80524e13508371fb8cdd9c"}, + {file = "pydantic_core-2.27.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d1b26e1dff225c31897696cab7d4f0a315d4c0d9e8666dbffdb28216f3b17fdc"}, + {file = "pydantic_core-2.27.1-cp311-none-win32.whl", hash = "sha256:2cdf7d86886bc6982354862204ae3b2f7f96f21a3eb0ba5ca0ac42c7b38598b9"}, + {file = "pydantic_core-2.27.1-cp311-none-win_amd64.whl", hash = "sha256:3af385b0cee8df3746c3f406f38bcbfdc9041b5c2d5ce3e5fc6637256e60bbc5"}, + {file = "pydantic_core-2.27.1-cp311-none-win_arm64.whl", hash = "sha256:81f2ec23ddc1b476ff96563f2e8d723830b06dceae348ce02914a37cb4e74b89"}, + {file = "pydantic_core-2.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:9cbd94fc661d2bab2bc702cddd2d3370bbdcc4cd0f8f57488a81bcce90c7a54f"}, + {file = "pydantic_core-2.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:5f8c4718cd44ec1580e180cb739713ecda2bdee1341084c1467802a417fe0f02"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15aae984e46de8d376df515f00450d1522077254ef6b7ce189b38ecee7c9677c"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1ba5e3963344ff25fc8c40da90f44b0afca8cfd89d12964feb79ac1411a260ac"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:992cea5f4f3b29d6b4f7f1726ed8ee46c8331c6b4eed6db5b40134c6fe1768bb"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0325336f348dbee6550d129b1627cb8f5351a9dc91aad141ffb96d4937bd9529"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7597c07fbd11515f654d6ece3d0e4e5093edc30a436c63142d9a4b8e22f19c35"}, + {file = "pydantic_core-2.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3bbd5d8cc692616d5ef6fbbbd50dbec142c7e6ad9beb66b78a96e9c16729b089"}, + {file = "pydantic_core-2.27.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:dc61505e73298a84a2f317255fcc72b710b72980f3a1f670447a21efc88f8381"}, + {file = "pydantic_core-2.27.1-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:e1f735dc43da318cad19b4173dd1ffce1d84aafd6c9b782b3abc04a0d5a6f5bb"}, + {file = "pydantic_core-2.27.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:f4e5658dbffe8843a0f12366a4c2d1c316dbe09bb4dfbdc9d2d9cd6031de8aae"}, + {file = "pydantic_core-2.27.1-cp312-none-win32.whl", hash = "sha256:672ebbe820bb37988c4d136eca2652ee114992d5d41c7e4858cdd90ea94ffe5c"}, + {file = "pydantic_core-2.27.1-cp312-none-win_amd64.whl", hash = "sha256:66ff044fd0bb1768688aecbe28b6190f6e799349221fb0de0e6f4048eca14c16"}, + {file = "pydantic_core-2.27.1-cp312-none-win_arm64.whl", hash = "sha256:9a3b0793b1bbfd4146304e23d90045f2a9b5fd5823aa682665fbdaf2a6c28f3e"}, + {file = "pydantic_core-2.27.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f216dbce0e60e4d03e0c4353c7023b202d95cbaeff12e5fd2e82ea0a66905073"}, + {file = "pydantic_core-2.27.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a2e02889071850bbfd36b56fd6bc98945e23670773bc7a76657e90e6b6603c08"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42b0e23f119b2b456d07ca91b307ae167cc3f6c846a7b169fca5326e32fdc6cf"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:764be71193f87d460a03f1f7385a82e226639732214b402f9aa61f0d025f0737"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1c00666a3bd2f84920a4e94434f5974d7bbc57e461318d6bb34ce9cdbbc1f6b2"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3ccaa88b24eebc0f849ce0a4d09e8a408ec5a94afff395eb69baf868f5183107"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c65af9088ac534313e1963443d0ec360bb2b9cba6c2909478d22c2e363d98a51"}, + {file = "pydantic_core-2.27.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:206b5cf6f0c513baffaeae7bd817717140770c74528f3e4c3e1cec7871ddd61a"}, + {file = "pydantic_core-2.27.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:062f60e512fc7fff8b8a9d680ff0ddaaef0193dba9fa83e679c0c5f5fbd018bc"}, + {file = "pydantic_core-2.27.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:a0697803ed7d4af5e4c1adf1670af078f8fcab7a86350e969f454daf598c4960"}, + {file = "pydantic_core-2.27.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:58ca98a950171f3151c603aeea9303ef6c235f692fe555e883591103da709b23"}, + {file = "pydantic_core-2.27.1-cp313-none-win32.whl", hash = "sha256:8065914ff79f7eab1599bd80406681f0ad08f8e47c880f17b416c9f8f7a26d05"}, + {file = "pydantic_core-2.27.1-cp313-none-win_amd64.whl", hash = "sha256:ba630d5e3db74c79300d9a5bdaaf6200172b107f263c98a0539eeecb857b2337"}, + {file = "pydantic_core-2.27.1-cp313-none-win_arm64.whl", hash = "sha256:45cf8588c066860b623cd11c4ba687f8d7175d5f7ef65f7129df8a394c502de5"}, + {file = "pydantic_core-2.27.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:5897bec80a09b4084aee23f9b73a9477a46c3304ad1d2d07acca19723fb1de62"}, + {file = "pydantic_core-2.27.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:d0165ab2914379bd56908c02294ed8405c252250668ebcb438a55494c69f44ab"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6b9af86e1d8e4cfc82c2022bfaa6f459381a50b94a29e95dcdda8442d6d83864"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f6c8a66741c5f5447e047ab0ba7a1c61d1e95580d64bce852e3df1f895c4067"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a42d6a8156ff78981f8aa56eb6394114e0dedb217cf8b729f438f643608cbcd"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:64c65f40b4cd8b0e049a8edde07e38b476da7e3aaebe63287c899d2cff253fa5"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdcf339322a3fae5cbd504edcefddd5a50d9ee00d968696846f089b4432cf78"}, + {file = "pydantic_core-2.27.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:bf99c8404f008750c846cb4ac4667b798a9f7de673ff719d705d9b2d6de49c5f"}, + {file = "pydantic_core-2.27.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:8f1edcea27918d748c7e5e4d917297b2a0ab80cad10f86631e488b7cddf76a36"}, + {file = "pydantic_core-2.27.1-cp38-cp38-musllinux_1_1_armv7l.whl", hash = "sha256:159cac0a3d096f79ab6a44d77a961917219707e2a130739c64d4dd46281f5c2a"}, + {file = "pydantic_core-2.27.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:029d9757eb621cc6e1848fa0b0310310de7301057f623985698ed7ebb014391b"}, + {file = "pydantic_core-2.27.1-cp38-none-win32.whl", hash = "sha256:a28af0695a45f7060e6f9b7092558a928a28553366519f64083c63a44f70e618"}, + {file = "pydantic_core-2.27.1-cp38-none-win_amd64.whl", hash = "sha256:2d4567c850905d5eaaed2f7a404e61012a51caf288292e016360aa2b96ff38d4"}, + {file = "pydantic_core-2.27.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e9386266798d64eeb19dd3677051f5705bf873e98e15897ddb7d76f477131967"}, + {file = "pydantic_core-2.27.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4228b5b646caa73f119b1ae756216b59cc6e2267201c27d3912b592c5e323b60"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0b3dfe500de26c52abe0477dde16192ac39c98f05bf2d80e76102d394bd13854"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aee66be87825cdf72ac64cb03ad4c15ffef4143dbf5c113f64a5ff4f81477bf9"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3b748c44bb9f53031c8cbc99a8a061bc181c1000c60a30f55393b6e9c45cc5bd"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ca038c7f6a0afd0b2448941b6ef9d5e1949e999f9e5517692eb6da58e9d44be"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e0bd57539da59a3e4671b90a502da9a28c72322a4f17866ba3ac63a82c4498e"}, + {file = "pydantic_core-2.27.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ac6c2c45c847bbf8f91930d88716a0fb924b51e0c6dad329b793d670ec5db792"}, + {file = "pydantic_core-2.27.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b94d4ba43739bbe8b0ce4262bcc3b7b9f31459ad120fb595627eaeb7f9b9ca01"}, + {file = "pydantic_core-2.27.1-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:00e6424f4b26fe82d44577b4c842d7df97c20be6439e8e685d0d715feceb9fb9"}, + {file = "pydantic_core-2.27.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:38de0a70160dd97540335b7ad3a74571b24f1dc3ed33f815f0880682e6880131"}, + {file = "pydantic_core-2.27.1-cp39-none-win32.whl", hash = "sha256:7ccebf51efc61634f6c2344da73e366c75e735960b5654b63d7e6f69a5885fa3"}, + {file = "pydantic_core-2.27.1-cp39-none-win_amd64.whl", hash = "sha256:a57847b090d7892f123726202b7daa20df6694cbd583b67a592e856bff603d6c"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3fa80ac2bd5856580e242dbc202db873c60a01b20309c8319b5c5986fbe53ce6"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d950caa237bb1954f1b8c9227b5065ba6875ac9771bb8ec790d956a699b78676"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0e4216e64d203e39c62df627aa882f02a2438d18a5f21d7f721621f7a5d3611d"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:02a3d637bd387c41d46b002f0e49c52642281edacd2740e5a42f7017feea3f2c"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:161c27ccce13b6b0c8689418da3885d3220ed2eae2ea5e9b2f7f3d48f1d52c27"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:19910754e4cc9c63bc1c7f6d73aa1cfee82f42007e407c0f413695c2f7ed777f"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:e173486019cc283dc9778315fa29a363579372fe67045e971e89b6365cc035ed"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:af52d26579b308921b73b956153066481f064875140ccd1dfd4e77db89dbb12f"}, + {file = "pydantic_core-2.27.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:981fb88516bd1ae8b0cbbd2034678a39dedc98752f264ac9bc5839d3923fa04c"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5fde892e6c697ce3e30c61b239330fc5d569a71fefd4eb6512fc6caec9dd9e2f"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:816f5aa087094099fff7edabb5e01cc370eb21aa1a1d44fe2d2aefdfb5599b31"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9c10c309e18e443ddb108f0ef64e8729363adbfd92d6d57beec680f6261556f3"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98476c98b02c8e9b2eec76ac4156fd006628b1b2d0ef27e548ffa978393fd154"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c3027001c28434e7ca5a6e1e527487051136aa81803ac812be51802150d880dd"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:7699b1df36a48169cdebda7ab5a2bac265204003f153b4bd17276153d997670a"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:1c39b07d90be6b48968ddc8c19e7585052088fd7ec8d568bb31ff64c70ae3c97"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:46ccfe3032b3915586e469d4972973f893c0a2bb65669194a5bdea9bacc088c2"}, + {file = "pydantic_core-2.27.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:62ba45e21cf6571d7f716d903b5b7b6d2617e2d5d67c0923dc47b9d41369f840"}, + {file = "pydantic_core-2.27.1.tar.gz", hash = "sha256:62a763352879b84aa31058fc931884055fd75089cccbd9d58bb6afd01141b235"}, ] [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" -[[package]] -name = "pyflakes" -version = "3.1.0" -description = "passive checker of Python programs" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pyflakes-3.1.0-py2.py3-none-any.whl", hash = "sha256:4132f6d49cb4dae6819e5379898f2b8cce3c5f23994194c24b77d5da2e36f774"}, - {file = "pyflakes-3.1.0.tar.gz", hash = "sha256:a0aae034c444db0071aa077972ba4768d40c830d9539fd45bf4cd3f8f6992efc"}, -] - [[package]] name = "pyjwt" -version = "2.9.0" +version = "2.10.1" description = "JSON Web Token implementation in Python" optional = true -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850"}, - {file = "pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c"}, + {file = "PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb"}, + {file = "pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953"}, ] [package.extras] @@ -4189,13 +4098,13 @@ image = ["Pillow"] [[package]] name = "pytest" -version = "8.3.3" +version = "8.3.4" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"}, - {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"}, + {file = "pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6"}, + {file = "pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761"}, ] [package.dependencies] @@ -4606,112 +4515,125 @@ requests = ">=2.0.1,<3.0.0" [[package]] name = "rpds-py" -version = "0.21.0" +version = "0.22.1" description = "Python bindings to Rust's persistent data structures (rpds)" optional = false python-versions = ">=3.9" files = [ - {file = "rpds_py-0.21.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a017f813f24b9df929674d0332a374d40d7f0162b326562daae8066b502d0590"}, - {file = "rpds_py-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:20cc1ed0bcc86d8e1a7e968cce15be45178fd16e2ff656a243145e0b439bd250"}, - {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ad116dda078d0bc4886cb7840e19811562acdc7a8e296ea6ec37e70326c1b41c"}, - {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:808f1ac7cf3b44f81c9475475ceb221f982ef548e44e024ad5f9e7060649540e"}, - {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:de552f4a1916e520f2703ec474d2b4d3f86d41f353e7680b597512ffe7eac5d0"}, - {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:efec946f331349dfc4ae9d0e034c263ddde19414fe5128580f512619abed05f1"}, - {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b80b4690bbff51a034bfde9c9f6bf9357f0a8c61f548942b80f7b66356508bf5"}, - {file = "rpds_py-0.21.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:085ed25baac88953d4283e5b5bd094b155075bb40d07c29c4f073e10623f9f2e"}, - {file = "rpds_py-0.21.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:daa8efac2a1273eed2354397a51216ae1e198ecbce9036fba4e7610b308b6153"}, - {file = "rpds_py-0.21.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:95a5bad1ac8a5c77b4e658671642e4af3707f095d2b78a1fdd08af0dfb647624"}, - {file = "rpds_py-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3e53861b29a13d5b70116ea4230b5f0f3547b2c222c5daa090eb7c9c82d7f664"}, - {file = "rpds_py-0.21.0-cp310-none-win32.whl", hash = "sha256:ea3a6ac4d74820c98fcc9da4a57847ad2cc36475a8bd9683f32ab6d47a2bd682"}, - {file = "rpds_py-0.21.0-cp310-none-win_amd64.whl", hash = "sha256:b8f107395f2f1d151181880b69a2869c69e87ec079c49c0016ab96860b6acbe5"}, - {file = "rpds_py-0.21.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:5555db3e618a77034954b9dc547eae94166391a98eb867905ec8fcbce1308d95"}, - {file = "rpds_py-0.21.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:97ef67d9bbc3e15584c2f3c74bcf064af36336c10d2e21a2131e123ce0f924c9"}, - {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ab2c2a26d2f69cdf833174f4d9d86118edc781ad9a8fa13970b527bf8236027"}, - {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:4e8921a259f54bfbc755c5bbd60c82bb2339ae0324163f32868f63f0ebb873d9"}, - {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a7ff941004d74d55a47f916afc38494bd1cfd4b53c482b77c03147c91ac0ac3"}, - {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5145282a7cd2ac16ea0dc46b82167754d5e103a05614b724457cffe614f25bd8"}, - {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:de609a6f1b682f70bb7163da745ee815d8f230d97276db049ab447767466a09d"}, - {file = "rpds_py-0.21.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:40c91c6e34cf016fa8e6b59d75e3dbe354830777fcfd74c58b279dceb7975b75"}, - {file = "rpds_py-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:d2132377f9deef0c4db89e65e8bb28644ff75a18df5293e132a8d67748397b9f"}, - {file = "rpds_py-0.21.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:0a9e0759e7be10109645a9fddaaad0619d58c9bf30a3f248a2ea57a7c417173a"}, - {file = "rpds_py-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9e20da3957bdf7824afdd4b6eeb29510e83e026473e04952dca565170cd1ecc8"}, - {file = "rpds_py-0.21.0-cp311-none-win32.whl", hash = "sha256:f71009b0d5e94c0e86533c0b27ed7cacc1239cb51c178fd239c3cfefefb0400a"}, - {file = "rpds_py-0.21.0-cp311-none-win_amd64.whl", hash = "sha256:e168afe6bf6ab7ab46c8c375606298784ecbe3ba31c0980b7dcbb9631dcba97e"}, - {file = "rpds_py-0.21.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:30b912c965b2aa76ba5168fd610087bad7fcde47f0a8367ee8f1876086ee6d1d"}, - {file = "rpds_py-0.21.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ca9989d5d9b1b300bc18e1801c67b9f6d2c66b8fd9621b36072ed1df2c977f72"}, - {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f54e7106f0001244a5f4cf810ba8d3f9c542e2730821b16e969d6887b664266"}, - {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fed5dfefdf384d6fe975cc026886aece4f292feaf69d0eeb716cfd3c5a4dd8be"}, - {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:590ef88db231c9c1eece44dcfefd7515d8bf0d986d64d0caf06a81998a9e8cab"}, - {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f983e4c2f603c95dde63df633eec42955508eefd8d0f0e6d236d31a044c882d7"}, - {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b229ce052ddf1a01c67d68166c19cb004fb3612424921b81c46e7ea7ccf7c3bf"}, - {file = "rpds_py-0.21.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ebf64e281a06c904a7636781d2e973d1f0926a5b8b480ac658dc0f556e7779f4"}, - {file = "rpds_py-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:998a8080c4495e4f72132f3d66ff91f5997d799e86cec6ee05342f8f3cda7dca"}, - {file = "rpds_py-0.21.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:98486337f7b4f3c324ab402e83453e25bb844f44418c066623db88e4c56b7c7b"}, - {file = "rpds_py-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a78d8b634c9df7f8d175451cfeac3810a702ccb85f98ec95797fa98b942cea11"}, - {file = "rpds_py-0.21.0-cp312-none-win32.whl", hash = "sha256:a58ce66847711c4aa2ecfcfaff04cb0327f907fead8945ffc47d9407f41ff952"}, - {file = "rpds_py-0.21.0-cp312-none-win_amd64.whl", hash = "sha256:e860f065cc4ea6f256d6f411aba4b1251255366e48e972f8a347cf88077b24fd"}, - {file = "rpds_py-0.21.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:ee4eafd77cc98d355a0d02f263efc0d3ae3ce4a7c24740010a8b4012bbb24937"}, - {file = "rpds_py-0.21.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:688c93b77e468d72579351a84b95f976bd7b3e84aa6686be6497045ba84be560"}, - {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c38dbf31c57032667dd5a2f0568ccde66e868e8f78d5a0d27dcc56d70f3fcd3b"}, - {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2d6129137f43f7fa02d41542ffff4871d4aefa724a5fe38e2c31a4e0fd343fb0"}, - {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:520ed8b99b0bf86a176271f6fe23024323862ac674b1ce5b02a72bfeff3fff44"}, - {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aaeb25ccfb9b9014a10eaf70904ebf3f79faaa8e60e99e19eef9f478651b9b74"}, - {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af04ac89c738e0f0f1b913918024c3eab6e3ace989518ea838807177d38a2e94"}, - {file = "rpds_py-0.21.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b9b76e2afd585803c53c5b29e992ecd183f68285b62fe2668383a18e74abe7a3"}, - {file = "rpds_py-0.21.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5afb5efde74c54724e1a01118c6e5c15e54e642c42a1ba588ab1f03544ac8c7a"}, - {file = "rpds_py-0.21.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:52c041802a6efa625ea18027a0723676a778869481d16803481ef6cc02ea8cb3"}, - {file = "rpds_py-0.21.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ee1e4fc267b437bb89990b2f2abf6c25765b89b72dd4a11e21934df449e0c976"}, - {file = "rpds_py-0.21.0-cp313-none-win32.whl", hash = "sha256:0c025820b78817db6a76413fff6866790786c38f95ea3f3d3c93dbb73b632202"}, - {file = "rpds_py-0.21.0-cp313-none-win_amd64.whl", hash = "sha256:320c808df533695326610a1b6a0a6e98f033e49de55d7dc36a13c8a30cfa756e"}, - {file = "rpds_py-0.21.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:2c51d99c30091f72a3c5d126fad26236c3f75716b8b5e5cf8effb18889ced928"}, - {file = "rpds_py-0.21.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cbd7504a10b0955ea287114f003b7ad62330c9e65ba012c6223dba646f6ffd05"}, - {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6dcc4949be728ede49e6244eabd04064336012b37f5c2200e8ec8eb2988b209c"}, - {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f414da5c51bf350e4b7960644617c130140423882305f7574b6cf65a3081cecb"}, - {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9afe42102b40007f588666bc7de82451e10c6788f6f70984629db193849dced1"}, - {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b929c2bb6e29ab31f12a1117c39f7e6d6450419ab7464a4ea9b0b417174f044"}, - {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8404b3717da03cbf773a1d275d01fec84ea007754ed380f63dfc24fb76ce4592"}, - {file = "rpds_py-0.21.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e12bb09678f38b7597b8346983d2323a6482dcd59e423d9448108c1be37cac9d"}, - {file = "rpds_py-0.21.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:58a0e345be4b18e6b8501d3b0aa540dad90caeed814c515e5206bb2ec26736fd"}, - {file = "rpds_py-0.21.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c3761f62fcfccf0864cc4665b6e7c3f0c626f0380b41b8bd1ce322103fa3ef87"}, - {file = "rpds_py-0.21.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:c2b2f71c6ad6c2e4fc9ed9401080badd1469fa9889657ec3abea42a3d6b2e1ed"}, - {file = "rpds_py-0.21.0-cp39-none-win32.whl", hash = "sha256:b21747f79f360e790525e6f6438c7569ddbfb1b3197b9e65043f25c3c9b489d8"}, - {file = "rpds_py-0.21.0-cp39-none-win_amd64.whl", hash = "sha256:0626238a43152918f9e72ede9a3b6ccc9e299adc8ade0d67c5e142d564c9a83d"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:6b4ef7725386dc0762857097f6b7266a6cdd62bfd209664da6712cb26acef035"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6bc0e697d4d79ab1aacbf20ee5f0df80359ecf55db33ff41481cf3e24f206919"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da52d62a96e61c1c444f3998c434e8b263c384f6d68aca8274d2e08d1906325c"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:98e4fe5db40db87ce1c65031463a760ec7906ab230ad2249b4572c2fc3ef1f9f"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:30bdc973f10d28e0337f71d202ff29345320f8bc49a31c90e6c257e1ccef4333"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:faa5e8496c530f9c71f2b4e1c49758b06e5f4055e17144906245c99fa6d45356"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:32eb88c30b6a4f0605508023b7141d043a79b14acb3b969aa0b4f99b25bc7d4a"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a89a8ce9e4e75aeb7fa5d8ad0f3fecdee813802592f4f46a15754dcb2fd6b061"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:241e6c125568493f553c3d0fdbb38c74babf54b45cef86439d4cd97ff8feb34d"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:3b766a9f57663396e4f34f5140b3595b233a7b146e94777b97a8413a1da1be18"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:af4a644bf890f56e41e74be7d34e9511e4954894d544ec6b8efe1e21a1a8da6c"}, - {file = "rpds_py-0.21.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:3e30a69a706e8ea20444b98a49f386c17b26f860aa9245329bab0851ed100677"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:031819f906bb146561af051c7cef4ba2003d28cff07efacef59da973ff7969ba"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:b876f2bc27ab5954e2fd88890c071bd0ed18b9c50f6ec3de3c50a5ece612f7a6"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dc5695c321e518d9f03b7ea6abb5ea3af4567766f9852ad1560f501b17588c7b"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b4de1da871b5c0fd5537b26a6fc6814c3cc05cabe0c941db6e9044ffbb12f04a"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:878f6fea96621fda5303a2867887686d7a198d9e0f8a40be100a63f5d60c88c9"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a8eeec67590e94189f434c6d11c426892e396ae59e4801d17a93ac96b8c02a6c"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ff2eba7f6c0cb523d7e9cff0903f2fe1feff8f0b2ceb6bd71c0e20a4dcee271"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a429b99337062877d7875e4ff1a51fe788424d522bd64a8c0a20ef3021fdb6ed"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d167e4dbbdac48bd58893c7e446684ad5d425b407f9336e04ab52e8b9194e2ed"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:4eb2de8a147ffe0626bfdc275fc6563aa7bf4b6db59cf0d44f0ccd6ca625a24e"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e78868e98f34f34a88e23ee9ccaeeec460e4eaf6db16d51d7a9b883e5e785a5e"}, - {file = "rpds_py-0.21.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:4991ca61656e3160cdaca4851151fd3f4a92e9eba5c7a530ab030d6aee96ec89"}, - {file = "rpds_py-0.21.0.tar.gz", hash = "sha256:ed6378c9d66d0de903763e7706383d60c33829581f0adff47b6535f1802fa6db"}, + {file = "rpds_py-0.22.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:ab27dd4edd84b13309f268ffcdfc07aef8339135ffab7b6d43f16884307a2a48"}, + {file = "rpds_py-0.22.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9d5b925156a746dc1f5f52376fdd1fbdd3f6ffe1fcd6f5e06f77ca79abb940a3"}, + {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:201650b309c419143775c15209c620627de3c09a27c7fb58375325aec5cce260"}, + {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:31264187fc934ff1024a4f56775f33c9252d3f4f3e27ec07d1995a26b52702c3"}, + {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:97c5ffe47ccf92d8b17e10f8a5ce28d015aa1196edc3359684cf31504eae6a14"}, + {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e9ac7280bd045f472b50306d7efeee051b69e3a2dd1b90f46bd7e86e63b1efa2"}, + {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5f941fb86195f97be7f6efe04a21b223f05dfe4d1dfb159999e2f8d101e44cc4"}, + {file = "rpds_py-0.22.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f91bfc39f7a64168e08ab831fa497ec5438c1d6c6e2f9e12848d95ad11ac8523"}, + {file = "rpds_py-0.22.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:effcae2152afe7937a28376dbabb25c770ef99ed4e16a4ffeb8e6a4f7c4f06aa"}, + {file = "rpds_py-0.22.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:2177e59c033bf0d1bf7de1ced561205963583caf3242c6c700a723034bfb5f8e"}, + {file = "rpds_py-0.22.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:66f4f48a89cdd30ab3a47335df81c76e9a63799d0d84b29c0618371c66fa37b0"}, + {file = "rpds_py-0.22.1-cp310-cp310-win32.whl", hash = "sha256:b07fa9e634234e84096adfa4be3828c8f26e238679c122824b2b3d7131bec578"}, + {file = "rpds_py-0.22.1-cp310-cp310-win_amd64.whl", hash = "sha256:ca4657e9fd0b1b5376942d403d634ce188f79064f0873aa853ab05b10185ceec"}, + {file = "rpds_py-0.22.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:608c84699b2db09c6a8743845b1a3dad36fae53eaaecb241d45b13dff74405fb"}, + {file = "rpds_py-0.22.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9dae4eb9b5534e09ba6c6ab496a757e5e394b7e7b08767d25ca37e8d36491114"}, + {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09a1f000c5f6e08b298275bae00921e9fbbf2a35dae0a86db2821c058c2201a9"}, + {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:580ccbf11f02f948add4cb641843030a89f1463d7c0740cbfc9aca91e9dc34b3"}, + {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96559e05bdf938b2048353e10a7920b98f853cefe4482c2064a718d7d0a50bd7"}, + {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:128cbaed7ba26116820bcb992405d6a13ea18c8fca1b8c4f59906d858e91e979"}, + {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:734783dd7da58f76222f458346ddebdb3621686a1a2a667db5049caf0c9956b9"}, + {file = "rpds_py-0.22.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c9ce6b83597d45bec44a2690857ede62fc98223772135f8a7fa90884eb726501"}, + {file = "rpds_py-0.22.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bca4428c4a957b78ded3e6e62884ab03f029dce8fa8d34818da0f80f61332b49"}, + {file = "rpds_py-0.22.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:1ded65691a1d3fd7d2aa89d2c91aa51f941601bb2ce099739909034d957fef4b"}, + {file = "rpds_py-0.22.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:72407065ad459db9f3d052ea8c51e02534f02533fc61e51cbab3bd94166f086c"}, + {file = "rpds_py-0.22.1-cp311-cp311-win32.whl", hash = "sha256:eb013aa01b404219f28dc973d9e6310fd4db216d7299253dd355629952e0564e"}, + {file = "rpds_py-0.22.1-cp311-cp311-win_amd64.whl", hash = "sha256:8bd9ec1db79a664f4cbb12878693b73416f4d2cb425d3e27eccc1bdfbdc826ef"}, + {file = "rpds_py-0.22.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:8ec41049c90d204a6561238a9ad6c7263ebb7009d9759c98b58078d9d2fec9ba"}, + {file = "rpds_py-0.22.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:102be79c4cc47a4aeb5912401185c404cd2601c15a7163bbecff7f1bfe20b669"}, + {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8a603155db408f773637f9e3a712c6e3cbc521aaa8fa2b99f9ba6106c59a2496"}, + {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5dbff9402c2bdf00bf0df9905694b3c292a3847c725651938a72f554351a5fcb"}, + {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:96b3759d8ab2323324e0a92b2f44834f9d88089b8d1ab6f533b61f4be3411cef"}, + {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3029f481b31f329b1fdb4ec4b56935d82210ddd9c6f86ea5a87c06f1e97b161"}, + {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d280b4bf09f719b89fd9aab3b71067acc0d0449b7d1eba99a2ade4939cef8296"}, + {file = "rpds_py-0.22.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6c8e97e19aa7b0b0d801a159f932ce4435f1049c8c38e2bb372bb5bee559ce50"}, + {file = "rpds_py-0.22.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:50e4b5d291105f7063259fe0125b1af902fb34499444d7c5c521dd8328b00939"}, + {file = "rpds_py-0.22.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d3777c446bb1c5fcd82dc3f8776e1a146cd91e80cc1892f8634575ace438d22f"}, + {file = "rpds_py-0.22.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:447ae1104fb32197b9262f772d565d38e834cc2e9edd89350b37b88fed636e70"}, + {file = "rpds_py-0.22.1-cp312-cp312-win32.whl", hash = "sha256:55d371b9d8b0c2a68a50413a8cb01c3c3ce1ea4f768bf77b66669a9a486e101e"}, + {file = "rpds_py-0.22.1-cp312-cp312-win_amd64.whl", hash = "sha256:413a30a99d8683dace3765885920ed27ab662efbb6c98d81db76c397ad1ffd71"}, + {file = "rpds_py-0.22.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:aa2ba0176037c915d8660a4e46581d645e2c22b5373e466bc8640a794d45861a"}, + {file = "rpds_py-0.22.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4ba6c66fbc6015b2f99e7176fec41793cecb00c4cc357cad038dff85e6ac42ab"}, + {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15fa4ca658f8ad22645d3531682b17e5580832efbfa87304c3e62214c79c1e8a"}, + {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d7833ef6f5d6cb634f296abfd93452fb3eb44c4e9a6ae95c1021eab704c1cee2"}, + {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c0467838c90435b80793cde486a318fc916ee57f2af54e4b10c72b20cbdcbaa9"}, + {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d962e2e89b3a95e3597a34b8c93ced1e98958502c5b8096c9fd69deff279f561"}, + {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ce729f1dc8a4a190c34b69f75377bddc004079b2963ab722ab91fafe040be6d"}, + {file = "rpds_py-0.22.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8080467df22feca0fc9c46567001777c6fbc2b4a2683a7137420896051874ca1"}, + {file = "rpds_py-0.22.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0f9eb37d3a60b262a98ab51ee899cac039de9ca0ce68dcf1a6518a09719020b0"}, + {file = "rpds_py-0.22.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:153248f48d6f90a295a502f53ec544a3ffbd21b0bb32f5dca39c4b93a764d6a2"}, + {file = "rpds_py-0.22.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0a53592cdf98cec3dfcdb24ffec8a4797e7656b65700099af43ec7df023b6de4"}, + {file = "rpds_py-0.22.1-cp313-cp313-win32.whl", hash = "sha256:e8056adcefa2dcb67e8bc91ea5eee26df66e8b297a8cd6ff0903f85c70908fa0"}, + {file = "rpds_py-0.22.1-cp313-cp313-win_amd64.whl", hash = "sha256:a451dba533be77454ebcffc85189108fc05f279100835ac76e7989edacb89156"}, + {file = "rpds_py-0.22.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:2ea23f1525d4f64286dbe0947c929d45c3ffe963b2dbed1d3844a2e4938bda42"}, + {file = "rpds_py-0.22.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3aaa22487477de9618ce3b37f99fbe81219ba96f3c2ca84f576f0ab451b83aba"}, + {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8954b9ffe60f479a0c0ba40987db2546c735ab02a725ea7fd89342152d4d821d"}, + {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c8502a02ae3ae67084f5a0bf5a8253b19fa7a887f824e41e016cdb0ac532a06f"}, + {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a083221b6a4ecdef38a60c95d8d3223d99449cb4da2544e9644958dc16664eb9"}, + {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:542eb246d5be31b5e0a9c8ddb9539416f9b31f58f75bd4ee328bff2b5c58d6fd"}, + {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ffae97d28ea4f2c613a751d087b75a97fb78311b38cc2e9a2f4587e473ace167"}, + {file = "rpds_py-0.22.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0ff8d5b13ce2357fa8b33a0a2e3775aa71df5bf7c8ba060634c9d15ab12f357"}, + {file = "rpds_py-0.22.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:0f057a0c546c42964836b209d8de9ea1a4f4b0432006c6343cbe633d8ca14571"}, + {file = "rpds_py-0.22.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:48ee97c7c6027fd423058675b5a39d0b5f7a1648250b671563d5c9f74ff13ff0"}, + {file = "rpds_py-0.22.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:babec324e8654a59122aaa66936a9a483faa03276db9792f51332475c2dddc4a"}, + {file = "rpds_py-0.22.1-cp313-cp313t-win32.whl", hash = "sha256:e69acdbc132c9592c8dc393af85e38e206ca847c7019a953ff625191c3a12312"}, + {file = "rpds_py-0.22.1-cp313-cp313t-win_amd64.whl", hash = "sha256:c783e4ed68200f4e03c125690d23158b1c49c4b186d458a18debc109bbdc3c2e"}, + {file = "rpds_py-0.22.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:2143c3aed85992604d758bbe67da839fb4aab3dd2e1c6dddab5b3ca7162b34a2"}, + {file = "rpds_py-0.22.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f57e2d0f8022783426121b586d7c842ea40ea832a29e28ca36c881b54c74fb28"}, + {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c0c324879d483504b07f7b18eb1b50567c434263bbe4866ecce33056162668a"}, + {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1c40e02cc4f3e18fd39344edb10eebe04bd11cfd13119606b5771e5ea51630d3"}, + {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f76c6f319e57007ad52e671ec741d801324760a377e3d4992c9bb8200333ebac"}, + {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f5cae9b415ea8a6a563566dbf46650222eccc5971c7daa16fbee63aef92ae543"}, + {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b09209cdfcacf5eba9cf80367130532e6c02e695252e1f64d3cfcc2356e6e19f"}, + {file = "rpds_py-0.22.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dbe428d0ac6eacaf05402adbaf137f59ad6063848182d1ff294f95ce0f24005b"}, + {file = "rpds_py-0.22.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:626b9feb01bff049a5aec4804f0c58db12585778b4902e5376a95b01f80a7a16"}, + {file = "rpds_py-0.22.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:ec1ccc2a9f764cd632fb8ab28fdde166250df54fc8d97315a4a6948dc5367639"}, + {file = "rpds_py-0.22.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:ef92b1fbe6aa2e7885eb90853cc016b1fc95439a8cc8da6d526880e9e2148695"}, + {file = "rpds_py-0.22.1-cp39-cp39-win32.whl", hash = "sha256:c88535f83f7391cf3a45af990237e3939a6fdfbedaed2571633bfdd0bceb36b0"}, + {file = "rpds_py-0.22.1-cp39-cp39-win_amd64.whl", hash = "sha256:7839b7528faa4d134c183b1f2dd1ee4dc2ca2f899f4f0cfdf00fc04c255262a7"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a0ed14a4162c2c2b21a162c9fcf90057e3e7da18cd171ab344c1e1664f75090e"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:05fdeae9010533e47715c37df83264df0122584e40d691d50cf3607c060952a3"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4659b2e4a5008715099e216050f5c6976e5a4329482664411789968b82e3f17d"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a18aedc032d6468b73ebbe4437129cb30d54fe543cde2f23671ecad76c3aea24"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:149b4d875ef9b12a8f5e303e86a32a58f8ef627e57ec97a7d0e4be819069d141"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fdaee3947eaaa52dae3ceb9d9f66329e13d8bae35682b1e5dd54612938693934"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:36ce951800ed2acc6772fd9f42150f29d567f0423989748052fdb39d9e2b5795"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ab784621d3e2a41916e21f13a483602cc989fd45fff637634b9231ba43d4383b"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:c2a214bf5b79bd39a9de1c991353aaaacafda83ba1374178309e92be8e67d411"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:85060e96953647871957d41707adb8d7bff4e977042fd0deb4fc1881b98dd2fe"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:c6f3fd617db422c9d4e12cb8d84c984fe07d6d9cb0950cbf117f3bccc6268d05"}, + {file = "rpds_py-0.22.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:f2d1b58a0c3a73f0361759642e80260a6d28eee6501b40fe25b82af33ef83f21"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:76eaa4c087a061a2c8a0a92536405069878a8f530c00e84a9eaf332e70f5561f"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:959ae04ed30cde606f3a0320f0a1f4167a107e685ef5209cce28c5080590bd31"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:198067aa6f3d942ff5d0d655bb1e91b59ae85279d47590682cba2834ac1b97d2"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3e7e99e2af59c56c59b6c964d612511b8203480d39d1ef83edc56f2cb42a3f5d"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0545928bdf53dfdfcab284468212efefb8a6608ca3b6910c7fb2e5ed8bdc2dc0"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef7282d8a14b60dd515e47060638687710b1d518f4b5e961caad43fb3a3606f9"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe3f245c2f39a5692d9123c174bc48f6f9fe3e96407e67c6d04541a767d99e72"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:efb2ad60ca8637d5f9f653f9a9a8d73964059972b6b95036be77e028bffc68a3"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:d8306f27418361b788e3fca9f47dec125457f80122e7e31ba7ff5cdba98343f8"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:4c8dc7331e8cbb1c0ea2bcb550adb1777365944ffd125c69aa1117fdef4887f5"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:776a06cb5720556a549829896a49acebb5bdd96c7bba100191a994053546975a"}, + {file = "rpds_py-0.22.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:e4f91d702b9ce1388660b3d4a28aa552614a1399e93f718ed0dacd68f23b3d32"}, + {file = "rpds_py-0.22.1.tar.gz", hash = "sha256:157a023bded0618a1eea54979fe2e0f9309e9ddc818ef4b8fc3b884ff38fedd5"}, ] [[package]] name = "s3transfer" -version = "0.10.3" +version = "0.10.4" description = "An Amazon S3 Transfer Manager" optional = true python-versions = ">=3.8" files = [ - {file = "s3transfer-0.10.3-py3-none-any.whl", hash = "sha256:263ed587a5803c6c708d3ce44dc4dfedaab4c1a32e8329bab818933d79ddcf5d"}, - {file = "s3transfer-0.10.3.tar.gz", hash = "sha256:4f50ed74ab84d474ce614475e0b8d5047ff080810aac5d01ea25231cfc944b0c"}, + {file = "s3transfer-0.10.4-py3-none-any.whl", hash = "sha256:244a76a24355363a68164241438de1b72f8781664920260c48465896b712a41e"}, + {file = "s3transfer-0.10.4.tar.gz", hash = "sha256:29edc09801743c21eb5ecbc617a152df41d3c287f67b615f73e5f750583666a7"}, ] [package.dependencies] @@ -4742,11 +4664,6 @@ files = [ {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, - {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, @@ -4850,23 +4767,23 @@ starkbank-ecdsa = ">=2.0.1" [[package]] name = "setuptools" -version = "75.3.0" +version = "75.6.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"}, - {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"}, + {file = "setuptools-75.6.0-py3-none-any.whl", hash = "sha256:ce74b49e8f7110f9bf04883b730f4765b774ef3ef28f722cce7c273d253aaf7d"}, + {file = "setuptools-75.6.0.tar.gz", hash = "sha256:8199222558df7c86216af4f84c30e9b34a61d8ba19366cc914424cdbd28252f6"}, ] [package.extras] -check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.7.0)"] +core = ["importlib_metadata (>=6)", "jaraco.collections", "jaraco.functools (>=4)", "jaraco.text (>=3.7)", "more_itertools", "more_itertools (>=8.8)", "packaging", "packaging (>=24.2)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=24.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib_metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (>=1.12,<1.14)", "pytest-mypy"] [[package]] name = "six" @@ -5218,7 +5135,6 @@ files = [ {file = "tiktoken-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d8c2d0e5ba6453a290b86cd65fc51fedf247e1ba170191715b049dac1f628005"}, {file = "tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1"}, {file = "tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a"}, - {file = "tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d"}, {file = "tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47"}, {file = "tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419"}, {file = "tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99"}, @@ -5380,31 +5296,61 @@ files = [ [[package]] name = "tomli" -version = "2.0.2" +version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" files = [ - {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, - {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, + {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, + {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, + {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, + {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, + {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, + {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, + {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, + {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] [[package]] name = "tqdm" -version = "4.67.0" +version = "4.67.1" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"}, - {file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"}, + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, ] [package.dependencies] colorama = {version = "*", markers = "platform_system == \"Windows\""} [package.extras] -dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] +dev = ["nbval", "pytest (>=6)", "pytest-asyncio (>=0.24)", "pytest-cov", "pytest-timeout"] discord = ["requests"] notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] @@ -5620,13 +5566,13 @@ text-embedding = ["sentence-transformers (==2.*)"] [[package]] name = "virtualenv" -version = "20.27.1" +version = "20.28.0" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.8" files = [ - {file = "virtualenv-20.27.1-py3-none-any.whl", hash = "sha256:f11f1b8a29525562925f745563bfd48b189450f61fb34c4f9cc79dd5aa32a1f4"}, - {file = "virtualenv-20.27.1.tar.gz", hash = "sha256:142c6be10212543b32c6c45d3d3893dff89112cc588b7d0879ae5a1ec03a47ba"}, + {file = "virtualenv-20.28.0-py3-none-any.whl", hash = "sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0"}, + {file = "virtualenv-20.28.0.tar.gz", hash = "sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa"}, ] [package.dependencies] @@ -5749,81 +5695,76 @@ dev = ["black (>=19.3b0)", "pytest (>=4.6.2)"] [[package]] name = "wrapt" -version = "1.16.0" +version = "1.17.0" description = "Module for decorators, wrappers and monkey patching." optional = true -python-versions = ">=3.6" +python-versions = ">=3.8" files = [ - {file = "wrapt-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ffa565331890b90056c01db69c0fe634a776f8019c143a5ae265f9c6bc4bd6d4"}, - {file = "wrapt-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e4fdb9275308292e880dcbeb12546df7f3e0f96c6b41197e0cf37d2826359020"}, - {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb2dee3874a500de01c93d5c71415fcaef1d858370d405824783e7a8ef5db440"}, - {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2a88e6010048489cda82b1326889ec075a8c856c2e6a256072b28eaee3ccf487"}, - {file = "wrapt-1.16.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac83a914ebaf589b69f7d0a1277602ff494e21f4c2f743313414378f8f50a4cf"}, - {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:73aa7d98215d39b8455f103de64391cb79dfcad601701a3aa0dddacf74911d72"}, - {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:807cc8543a477ab7422f1120a217054f958a66ef7314f76dd9e77d3f02cdccd0"}, - {file = "wrapt-1.16.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bf5703fdeb350e36885f2875d853ce13172ae281c56e509f4e6eca049bdfb136"}, - {file = "wrapt-1.16.0-cp310-cp310-win32.whl", hash = "sha256:f6b2d0c6703c988d334f297aa5df18c45e97b0af3679bb75059e0e0bd8b1069d"}, - {file = "wrapt-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:decbfa2f618fa8ed81c95ee18a387ff973143c656ef800c9f24fb7e9c16054e2"}, - {file = "wrapt-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1a5db485fe2de4403f13fafdc231b0dbae5eca4359232d2efc79025527375b09"}, - {file = "wrapt-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:75ea7d0ee2a15733684badb16de6794894ed9c55aa5e9903260922f0482e687d"}, - {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a452f9ca3e3267cd4d0fcf2edd0d035b1934ac2bd7e0e57ac91ad6b95c0c6389"}, - {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:43aa59eadec7890d9958748db829df269f0368521ba6dc68cc172d5d03ed8060"}, - {file = "wrapt-1.16.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:72554a23c78a8e7aa02abbd699d129eead8b147a23c56e08d08dfc29cfdddca1"}, - {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d2efee35b4b0a347e0d99d28e884dfd82797852d62fcd7ebdeee26f3ceb72cf3"}, - {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:6dcfcffe73710be01d90cae08c3e548d90932d37b39ef83969ae135d36ef3956"}, - {file = "wrapt-1.16.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:eb6e651000a19c96f452c85132811d25e9264d836951022d6e81df2fff38337d"}, - {file = "wrapt-1.16.0-cp311-cp311-win32.whl", hash = "sha256:66027d667efe95cc4fa945af59f92c5a02c6f5bb6012bff9e60542c74c75c362"}, - {file = "wrapt-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:aefbc4cb0a54f91af643660a0a150ce2c090d3652cf4052a5397fb2de549cd89"}, - {file = "wrapt-1.16.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:5eb404d89131ec9b4f748fa5cfb5346802e5ee8836f57d516576e61f304f3b7b"}, - {file = "wrapt-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:9090c9e676d5236a6948330e83cb89969f433b1943a558968f659ead07cb3b36"}, - {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:94265b00870aa407bd0cbcfd536f17ecde43b94fb8d228560a1e9d3041462d73"}, - {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2058f813d4f2b5e3a9eb2eb3faf8f1d99b81c3e51aeda4b168406443e8ba809"}, - {file = "wrapt-1.16.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98b5e1f498a8ca1858a1cdbffb023bfd954da4e3fa2c0cb5853d40014557248b"}, - {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14d7dc606219cdd7405133c713f2c218d4252f2a469003f8c46bb92d5d095d81"}, - {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:49aac49dc4782cb04f58986e81ea0b4768e4ff197b57324dcbd7699c5dfb40b9"}, - {file = "wrapt-1.16.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:418abb18146475c310d7a6dc71143d6f7adec5b004ac9ce08dc7a34e2babdc5c"}, - {file = "wrapt-1.16.0-cp312-cp312-win32.whl", hash = "sha256:685f568fa5e627e93f3b52fda002c7ed2fa1800b50ce51f6ed1d572d8ab3e7fc"}, - {file = "wrapt-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:dcdba5c86e368442528f7060039eda390cc4091bfd1dca41e8046af7c910dda8"}, - {file = "wrapt-1.16.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:d462f28826f4657968ae51d2181a074dfe03c200d6131690b7d65d55b0f360f8"}, - {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a33a747400b94b6d6b8a165e4480264a64a78c8a4c734b62136062e9a248dd39"}, - {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b3646eefa23daeba62643a58aac816945cadc0afaf21800a1421eeba5f6cfb9c"}, - {file = "wrapt-1.16.0-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ebf019be5c09d400cf7b024aa52b1f3aeebeff51550d007e92c3c1c4afc2a40"}, - {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:0d2691979e93d06a95a26257adb7bfd0c93818e89b1406f5a28f36e0d8c1e1fc"}, - {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:1acd723ee2a8826f3d53910255643e33673e1d11db84ce5880675954183ec47e"}, - {file = "wrapt-1.16.0-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:bc57efac2da352a51cc4658878a68d2b1b67dbe9d33c36cb826ca449d80a8465"}, - {file = "wrapt-1.16.0-cp36-cp36m-win32.whl", hash = "sha256:da4813f751142436b075ed7aa012a8778aa43a99f7b36afe9b742d3ed8bdc95e"}, - {file = "wrapt-1.16.0-cp36-cp36m-win_amd64.whl", hash = "sha256:6f6eac2360f2d543cc875a0e5efd413b6cbd483cb3ad7ebf888884a6e0d2e966"}, - {file = "wrapt-1.16.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a0ea261ce52b5952bf669684a251a66df239ec6d441ccb59ec7afa882265d593"}, - {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7bd2d7ff69a2cac767fbf7a2b206add2e9a210e57947dd7ce03e25d03d2de292"}, - {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9159485323798c8dc530a224bd3ffcf76659319ccc7bbd52e01e73bd0241a0c5"}, - {file = "wrapt-1.16.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a86373cf37cd7764f2201b76496aba58a52e76dedfaa698ef9e9688bfd9e41cf"}, - {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:73870c364c11f03ed072dda68ff7aea6d2a3a5c3fe250d917a429c7432e15228"}, - {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b935ae30c6e7400022b50f8d359c03ed233d45b725cfdd299462f41ee5ffba6f"}, - {file = "wrapt-1.16.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:db98ad84a55eb09b3c32a96c576476777e87c520a34e2519d3e59c44710c002c"}, - {file = "wrapt-1.16.0-cp37-cp37m-win32.whl", hash = "sha256:9153ed35fc5e4fa3b2fe97bddaa7cbec0ed22412b85bcdaf54aeba92ea37428c"}, - {file = "wrapt-1.16.0-cp37-cp37m-win_amd64.whl", hash = "sha256:66dfbaa7cfa3eb707bbfcd46dab2bc6207b005cbc9caa2199bcbc81d95071a00"}, - {file = "wrapt-1.16.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1dd50a2696ff89f57bd8847647a1c363b687d3d796dc30d4dd4a9d1689a706f0"}, - {file = "wrapt-1.16.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:44a2754372e32ab315734c6c73b24351d06e77ffff6ae27d2ecf14cf3d229202"}, - {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e9723528b9f787dc59168369e42ae1c3b0d3fadb2f1a71de14531d321ee05b0"}, - {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:dbed418ba5c3dce92619656802cc5355cb679e58d0d89b50f116e4a9d5a9603e"}, - {file = "wrapt-1.16.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:941988b89b4fd6b41c3f0bfb20e92bd23746579736b7343283297c4c8cbae68f"}, - {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:6a42cd0cfa8ffc1915aef79cb4284f6383d8a3e9dcca70c445dcfdd639d51267"}, - {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:1ca9b6085e4f866bd584fb135a041bfc32cab916e69f714a7d1d397f8c4891ca"}, - {file = "wrapt-1.16.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d5e49454f19ef621089e204f862388d29e6e8d8b162efce05208913dde5b9ad6"}, - {file = "wrapt-1.16.0-cp38-cp38-win32.whl", hash = "sha256:c31f72b1b6624c9d863fc095da460802f43a7c6868c5dda140f51da24fd47d7b"}, - {file = "wrapt-1.16.0-cp38-cp38-win_amd64.whl", hash = "sha256:490b0ee15c1a55be9c1bd8609b8cecd60e325f0575fc98f50058eae366e01f41"}, - {file = "wrapt-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9b201ae332c3637a42f02d1045e1d0cccfdc41f1f2f801dafbaa7e9b4797bfc2"}, - {file = "wrapt-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2076fad65c6736184e77d7d4729b63a6d1ae0b70da4868adeec40989858eb3fb"}, - {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5cd603b575ebceca7da5a3a251e69561bec509e0b46e4993e1cac402b7247b8"}, - {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b47cfad9e9bbbed2339081f4e346c93ecd7ab504299403320bf85f7f85c7d46c"}, - {file = "wrapt-1.16.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f8212564d49c50eb4565e502814f694e240c55551a5f1bc841d4fcaabb0a9b8a"}, - {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:5f15814a33e42b04e3de432e573aa557f9f0f56458745c2074952f564c50e664"}, - {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:db2e408d983b0e61e238cf579c09ef7020560441906ca990fe8412153e3b291f"}, - {file = "wrapt-1.16.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:edfad1d29c73f9b863ebe7082ae9321374ccb10879eeabc84ba3b69f2579d537"}, - {file = "wrapt-1.16.0-cp39-cp39-win32.whl", hash = "sha256:ed867c42c268f876097248e05b6117a65bcd1e63b779e916fe2e33cd6fd0d3c3"}, - {file = "wrapt-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:eb1b046be06b0fce7249f1d025cd359b4b80fc1c3e24ad9eca33e0dcdb2e4a35"}, - {file = "wrapt-1.16.0-py3-none-any.whl", hash = "sha256:6906c4100a8fcbf2fa735f6059214bb13b97f75b1a61777fcf6432121ef12ef1"}, - {file = "wrapt-1.16.0.tar.gz", hash = "sha256:5f370f952971e7d17c7d1ead40e49f32345a7f7a5373571ef44d800d06b1899d"}, + {file = "wrapt-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2a0c23b8319848426f305f9cb0c98a6e32ee68a36264f45948ccf8e7d2b941f8"}, + {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1ca5f060e205f72bec57faae5bd817a1560fcfc4af03f414b08fa29106b7e2d"}, + {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e185ec6060e301a7e5f8461c86fb3640a7beb1a0f0208ffde7a65ec4074931df"}, + {file = "wrapt-1.17.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb90765dd91aed05b53cd7a87bd7f5c188fcd95960914bae0d32c5e7f899719d"}, + {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:879591c2b5ab0a7184258274c42a126b74a2c3d5a329df16d69f9cee07bba6ea"}, + {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fce6fee67c318fdfb7f285c29a82d84782ae2579c0e1b385b7f36c6e8074fffb"}, + {file = "wrapt-1.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0698d3a86f68abc894d537887b9bbf84d29bcfbc759e23f4644be27acf6da301"}, + {file = "wrapt-1.17.0-cp310-cp310-win32.whl", hash = "sha256:69d093792dc34a9c4c8a70e4973a3361c7a7578e9cd86961b2bbf38ca71e4e22"}, + {file = "wrapt-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:f28b29dc158ca5d6ac396c8e0a2ef45c4e97bb7e65522bfc04c989e6fe814575"}, + {file = "wrapt-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:74bf625b1b4caaa7bad51d9003f8b07a468a704e0644a700e936c357c17dd45a"}, + {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0f2a28eb35cf99d5f5bd12f5dd44a0f41d206db226535b37b0c60e9da162c3ed"}, + {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:81b1289e99cf4bad07c23393ab447e5e96db0ab50974a280f7954b071d41b489"}, + {file = "wrapt-1.17.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9f2939cd4a2a52ca32bc0b359015718472d7f6de870760342e7ba295be9ebaf9"}, + {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:6a9653131bda68a1f029c52157fd81e11f07d485df55410401f745007bd6d339"}, + {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:4e4b4385363de9052dac1a67bfb535c376f3d19c238b5f36bddc95efae15e12d"}, + {file = "wrapt-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bdf62d25234290db1837875d4dceb2151e4ea7f9fff2ed41c0fde23ed542eb5b"}, + {file = "wrapt-1.17.0-cp311-cp311-win32.whl", hash = "sha256:5d8fd17635b262448ab8f99230fe4dac991af1dabdbb92f7a70a6afac8a7e346"}, + {file = "wrapt-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:92a3d214d5e53cb1db8b015f30d544bc9d3f7179a05feb8f16df713cecc2620a"}, + {file = "wrapt-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:89fc28495896097622c3fc238915c79365dd0ede02f9a82ce436b13bd0ab7569"}, + {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:875d240fdbdbe9e11f9831901fb8719da0bd4e6131f83aa9f69b96d18fae7504"}, + {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5ed16d95fd142e9c72b6c10b06514ad30e846a0d0917ab406186541fe68b451"}, + {file = "wrapt-1.17.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:18b956061b8db634120b58f668592a772e87e2e78bc1f6a906cfcaa0cc7991c1"}, + {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:daba396199399ccabafbfc509037ac635a6bc18510ad1add8fd16d4739cdd106"}, + {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4d63f4d446e10ad19ed01188d6c1e1bb134cde8c18b0aa2acfd973d41fcc5ada"}, + {file = "wrapt-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8a5e7cc39a45fc430af1aefc4d77ee6bad72c5bcdb1322cfde852c15192b8bd4"}, + {file = "wrapt-1.17.0-cp312-cp312-win32.whl", hash = "sha256:0a0a1a1ec28b641f2a3a2c35cbe86c00051c04fffcfcc577ffcdd707df3f8635"}, + {file = "wrapt-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:3c34f6896a01b84bab196f7119770fd8466c8ae3dfa73c59c0bb281e7b588ce7"}, + {file = "wrapt-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:714c12485aa52efbc0fc0ade1e9ab3a70343db82627f90f2ecbc898fdf0bb181"}, + {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:da427d311782324a376cacb47c1a4adc43f99fd9d996ffc1b3e8529c4074d393"}, + {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba1739fb38441a27a676f4de4123d3e858e494fac05868b7a281c0a383c098f4"}, + {file = "wrapt-1.17.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e711fc1acc7468463bc084d1b68561e40d1eaa135d8c509a65dd534403d83d7b"}, + {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:140ea00c87fafc42739bd74a94a5a9003f8e72c27c47cd4f61d8e05e6dec8721"}, + {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:73a96fd11d2b2e77d623a7f26e004cc31f131a365add1ce1ce9a19e55a1eef90"}, + {file = "wrapt-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0b48554952f0f387984da81ccfa73b62e52817a4386d070c75e4db7d43a28c4a"}, + {file = "wrapt-1.17.0-cp313-cp313-win32.whl", hash = "sha256:498fec8da10e3e62edd1e7368f4b24aa362ac0ad931e678332d1b209aec93045"}, + {file = "wrapt-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:fd136bb85f4568fffca995bd3c8d52080b1e5b225dbf1c2b17b66b4c5fa02838"}, + {file = "wrapt-1.17.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:17fcf043d0b4724858f25b8826c36e08f9fb2e475410bece0ec44a22d533da9b"}, + {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4a557d97f12813dc5e18dad9fa765ae44ddd56a672bb5de4825527c847d6379"}, + {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0229b247b0fc7dee0d36176cbb79dbaf2a9eb7ecc50ec3121f40ef443155fb1d"}, + {file = "wrapt-1.17.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8425cfce27b8b20c9b89d77fb50e368d8306a90bf2b6eef2cdf5cd5083adf83f"}, + {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9c900108df470060174108012de06d45f514aa4ec21a191e7ab42988ff42a86c"}, + {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:4e547b447073fc0dbfcbff15154c1be8823d10dab4ad401bdb1575e3fdedff1b"}, + {file = "wrapt-1.17.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:914f66f3b6fc7b915d46c1cc424bc2441841083de01b90f9e81109c9759e43ab"}, + {file = "wrapt-1.17.0-cp313-cp313t-win32.whl", hash = "sha256:a4192b45dff127c7d69b3bdfb4d3e47b64179a0b9900b6351859f3001397dabf"}, + {file = "wrapt-1.17.0-cp313-cp313t-win_amd64.whl", hash = "sha256:4f643df3d4419ea3f856c5c3f40fec1d65ea2e89ec812c83f7767c8730f9827a"}, + {file = "wrapt-1.17.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:69c40d4655e078ede067a7095544bcec5a963566e17503e75a3a3e0fe2803b13"}, + {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f495b6754358979379f84534f8dd7a43ff8cff2558dcdea4a148a6e713a758f"}, + {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:baa7ef4e0886a6f482e00d1d5bcd37c201b383f1d314643dfb0367169f94f04c"}, + {file = "wrapt-1.17.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a8fc931382e56627ec4acb01e09ce66e5c03c384ca52606111cee50d931a342d"}, + {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:8f8909cdb9f1b237786c09a810e24ee5e15ef17019f7cecb207ce205b9b5fcce"}, + {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ad47b095f0bdc5585bced35bd088cbfe4177236c7df9984b3cc46b391cc60627"}, + {file = "wrapt-1.17.0-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:948a9bd0fb2c5120457b07e59c8d7210cbc8703243225dbd78f4dfc13c8d2d1f"}, + {file = "wrapt-1.17.0-cp38-cp38-win32.whl", hash = "sha256:5ae271862b2142f4bc687bdbfcc942e2473a89999a54231aa1c2c676e28f29ea"}, + {file = "wrapt-1.17.0-cp38-cp38-win_amd64.whl", hash = "sha256:f335579a1b485c834849e9075191c9898e0731af45705c2ebf70e0cd5d58beed"}, + {file = "wrapt-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d751300b94e35b6016d4b1e7d0e7bbc3b5e1751e2405ef908316c2a9024008a1"}, + {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7264cbb4a18dc4acfd73b63e4bcfec9c9802614572025bdd44d0721983fc1d9c"}, + {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33539c6f5b96cf0b1105a0ff4cf5db9332e773bb521cc804a90e58dc49b10578"}, + {file = "wrapt-1.17.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c30970bdee1cad6a8da2044febd824ef6dc4cc0b19e39af3085c763fdec7de33"}, + {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:bc7f729a72b16ee21795a943f85c6244971724819819a41ddbaeb691b2dd85ad"}, + {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:6ff02a91c4fc9b6a94e1c9c20f62ea06a7e375f42fe57587f004d1078ac86ca9"}, + {file = "wrapt-1.17.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:2dfb7cff84e72e7bf975b06b4989477873dcf160b2fd89959c629535df53d4e0"}, + {file = "wrapt-1.17.0-cp39-cp39-win32.whl", hash = "sha256:2399408ac33ffd5b200480ee858baa58d77dd30e0dd0cab6a8a9547135f30a88"}, + {file = "wrapt-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:4f763a29ee6a20c529496a20a7bcb16a73de27f5da6a843249c7047daf135977"}, + {file = "wrapt-1.17.0-py3-none-any.whl", hash = "sha256:d2c63b93548eda58abf5188e505ffed0229bf675f7c3090f8e36ad55b8cbc371"}, + {file = "wrapt-1.17.0.tar.gz", hash = "sha256:16187aa2317c731170a88ef35e8937ae0f533c402872c1ee5e6d079fcf320801"}, ] [[package]] @@ -5839,93 +5780,93 @@ files = [ [[package]] name = "yarl" -version = "1.17.1" +version = "1.18.3" description = "Yet another URL library" optional = false python-versions = ">=3.9" files = [ - {file = "yarl-1.17.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:0b1794853124e2f663f0ea54efb0340b457f08d40a1cef78edfa086576179c91"}, - {file = "yarl-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:fbea1751729afe607d84acfd01efd95e3b31db148a181a441984ce9b3d3469da"}, - {file = "yarl-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8ee427208c675f1b6e344a1f89376a9613fc30b52646a04ac0c1f6587c7e46ec"}, - {file = "yarl-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b74ff4767d3ef47ffe0cd1d89379dc4d828d4873e5528976ced3b44fe5b0a21"}, - {file = "yarl-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:62a91aefff3d11bf60e5956d340eb507a983a7ec802b19072bb989ce120cd948"}, - {file = "yarl-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:846dd2e1243407133d3195d2d7e4ceefcaa5f5bf7278f0a9bda00967e6326b04"}, - {file = "yarl-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3e844be8d536afa129366d9af76ed7cb8dfefec99f5f1c9e4f8ae542279a6dc3"}, - {file = "yarl-1.17.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cc7c92c1baa629cb03ecb0c3d12564f172218fb1739f54bf5f3881844daadc6d"}, - {file = "yarl-1.17.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ae3476e934b9d714aa8000d2e4c01eb2590eee10b9d8cd03e7983ad65dfbfcba"}, - {file = "yarl-1.17.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:c7e177c619342e407415d4f35dec63d2d134d951e24b5166afcdfd1362828e17"}, - {file = "yarl-1.17.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:64cc6e97f14cf8a275d79c5002281f3040c12e2e4220623b5759ea7f9868d6a5"}, - {file = "yarl-1.17.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:84c063af19ef5130084db70ada40ce63a84f6c1ef4d3dbc34e5e8c4febb20822"}, - {file = "yarl-1.17.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:482c122b72e3c5ec98f11457aeb436ae4aecca75de19b3d1de7cf88bc40db82f"}, - {file = "yarl-1.17.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:380e6c38ef692b8fd5a0f6d1fa8774d81ebc08cfbd624b1bca62a4d4af2f9931"}, - {file = "yarl-1.17.1-cp310-cp310-win32.whl", hash = "sha256:16bca6678a83657dd48df84b51bd56a6c6bd401853aef6d09dc2506a78484c7b"}, - {file = "yarl-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:561c87fea99545ef7d692403c110b2f99dced6dff93056d6e04384ad3bc46243"}, - {file = "yarl-1.17.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cbad927ea8ed814622305d842c93412cb47bd39a496ed0f96bfd42b922b4a217"}, - {file = "yarl-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:fca4b4307ebe9c3ec77a084da3a9d1999d164693d16492ca2b64594340999988"}, - {file = "yarl-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ff5c6771c7e3511a06555afa317879b7db8d640137ba55d6ab0d0c50425cab75"}, - {file = "yarl-1.17.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b29beab10211a746f9846baa39275e80034e065460d99eb51e45c9a9495bcca"}, - {file = "yarl-1.17.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1a52a1ffdd824fb1835272e125385c32fd8b17fbdefeedcb4d543cc23b332d74"}, - {file = "yarl-1.17.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:58c8e9620eb82a189c6c40cb6b59b4e35b2ee68b1f2afa6597732a2b467d7e8f"}, - {file = "yarl-1.17.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d216e5d9b8749563c7f2c6f7a0831057ec844c68b4c11cb10fc62d4fd373c26d"}, - {file = "yarl-1.17.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:881764d610e3269964fc4bb3c19bb6fce55422828e152b885609ec176b41cf11"}, - {file = "yarl-1.17.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8c79e9d7e3d8a32d4824250a9c6401194fb4c2ad9a0cec8f6a96e09a582c2cc0"}, - {file = "yarl-1.17.1-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:299f11b44d8d3a588234adbe01112126010bd96d9139c3ba7b3badd9829261c3"}, - {file = "yarl-1.17.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:cc7d768260f4ba4ea01741c1b5fe3d3a6c70eb91c87f4c8761bbcce5181beafe"}, - {file = "yarl-1.17.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:de599af166970d6a61accde358ec9ded821234cbbc8c6413acfec06056b8e860"}, - {file = "yarl-1.17.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2b24ec55fad43e476905eceaf14f41f6478780b870eda5d08b4d6de9a60b65b4"}, - {file = "yarl-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9fb815155aac6bfa8d86184079652c9715c812d506b22cfa369196ef4e99d1b4"}, - {file = "yarl-1.17.1-cp311-cp311-win32.whl", hash = "sha256:7615058aabad54416ddac99ade09a5510cf77039a3b903e94e8922f25ed203d7"}, - {file = "yarl-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:14bc88baa44e1f84164a392827b5defb4fa8e56b93fecac3d15315e7c8e5d8b3"}, - {file = "yarl-1.17.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:327828786da2006085a4d1feb2594de6f6d26f8af48b81eb1ae950c788d97f61"}, - {file = "yarl-1.17.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cc353841428d56b683a123a813e6a686e07026d6b1c5757970a877195f880c2d"}, - {file = "yarl-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c73df5b6e8fabe2ddb74876fb82d9dd44cbace0ca12e8861ce9155ad3c886139"}, - {file = "yarl-1.17.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0bdff5e0995522706c53078f531fb586f56de9c4c81c243865dd5c66c132c3b5"}, - {file = "yarl-1.17.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:06157fb3c58f2736a5e47c8fcbe1afc8b5de6fb28b14d25574af9e62150fcaac"}, - {file = "yarl-1.17.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1654ec814b18be1af2c857aa9000de7a601400bd4c9ca24629b18486c2e35463"}, - {file = "yarl-1.17.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f6595c852ca544aaeeb32d357e62c9c780eac69dcd34e40cae7b55bc4fb1147"}, - {file = "yarl-1.17.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:459e81c2fb920b5f5df744262d1498ec2c8081acdcfe18181da44c50f51312f7"}, - {file = "yarl-1.17.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7e48cdb8226644e2fbd0bdb0a0f87906a3db07087f4de77a1b1b1ccfd9e93685"}, - {file = "yarl-1.17.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:d9b6b28a57feb51605d6ae5e61a9044a31742db557a3b851a74c13bc61de5172"}, - {file = "yarl-1.17.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e594b22688d5747b06e957f1ef822060cb5cb35b493066e33ceac0cf882188b7"}, - {file = "yarl-1.17.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5f236cb5999ccd23a0ab1bd219cfe0ee3e1c1b65aaf6dd3320e972f7ec3a39da"}, - {file = "yarl-1.17.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a2a64e62c7a0edd07c1c917b0586655f3362d2c2d37d474db1a509efb96fea1c"}, - {file = "yarl-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d0eea830b591dbc68e030c86a9569826145df485b2b4554874b07fea1275a199"}, - {file = "yarl-1.17.1-cp312-cp312-win32.whl", hash = "sha256:46ddf6e0b975cd680eb83318aa1d321cb2bf8d288d50f1754526230fcf59ba96"}, - {file = "yarl-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:117ed8b3732528a1e41af3aa6d4e08483c2f0f2e3d3d7dca7cf538b3516d93df"}, - {file = "yarl-1.17.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:5d1d42556b063d579cae59e37a38c61f4402b47d70c29f0ef15cee1acaa64488"}, - {file = "yarl-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c0167540094838ee9093ef6cc2c69d0074bbf84a432b4995835e8e5a0d984374"}, - {file = "yarl-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2f0a6423295a0d282d00e8701fe763eeefba8037e984ad5de44aa349002562ac"}, - {file = "yarl-1.17.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5b078134f48552c4d9527db2f7da0b5359abd49393cdf9794017baec7506170"}, - {file = "yarl-1.17.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d401f07261dc5aa36c2e4efc308548f6ae943bfff20fcadb0a07517a26b196d8"}, - {file = "yarl-1.17.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b5f1ac7359e17efe0b6e5fec21de34145caef22b260e978336f325d5c84e6938"}, - {file = "yarl-1.17.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f63d176a81555984e91f2c84c2a574a61cab7111cc907e176f0f01538e9ff6e"}, - {file = "yarl-1.17.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9e275792097c9f7e80741c36de3b61917aebecc08a67ae62899b074566ff8556"}, - {file = "yarl-1.17.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:81713b70bea5c1386dc2f32a8f0dab4148a2928c7495c808c541ee0aae614d67"}, - {file = "yarl-1.17.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:aa46dce75078fceaf7cecac5817422febb4355fbdda440db55206e3bd288cfb8"}, - {file = "yarl-1.17.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1ce36ded585f45b1e9bb36d0ae94765c6608b43bd2e7f5f88079f7a85c61a4d3"}, - {file = "yarl-1.17.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:2d374d70fdc36f5863b84e54775452f68639bc862918602d028f89310a034ab0"}, - {file = "yarl-1.17.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:2d9f0606baaec5dd54cb99667fcf85183a7477f3766fbddbe3f385e7fc253299"}, - {file = "yarl-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b0341e6d9a0c0e3cdc65857ef518bb05b410dbd70d749a0d33ac0f39e81a4258"}, - {file = "yarl-1.17.1-cp313-cp313-win32.whl", hash = "sha256:2e7ba4c9377e48fb7b20dedbd473cbcbc13e72e1826917c185157a137dac9df2"}, - {file = "yarl-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:949681f68e0e3c25377462be4b658500e85ca24323d9619fdc41f68d46a1ffda"}, - {file = "yarl-1.17.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:8994b29c462de9a8fce2d591028b986dbbe1b32f3ad600b2d3e1c482c93abad6"}, - {file = "yarl-1.17.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:f9cbfbc5faca235fbdf531b93aa0f9f005ec7d267d9d738761a4d42b744ea159"}, - {file = "yarl-1.17.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:b40d1bf6e6f74f7c0a567a9e5e778bbd4699d1d3d2c0fe46f4b717eef9e96b95"}, - {file = "yarl-1.17.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f5efe0661b9fcd6246f27957f6ae1c0eb29bc60552820f01e970b4996e016004"}, - {file = "yarl-1.17.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b5c4804e4039f487e942c13381e6c27b4b4e66066d94ef1fae3f6ba8b953f383"}, - {file = "yarl-1.17.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b5d6a6c9602fd4598fa07e0389e19fe199ae96449008d8304bf5d47cb745462e"}, - {file = "yarl-1.17.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f4c9156c4d1eb490fe374fb294deeb7bc7eaccda50e23775b2354b6a6739934"}, - {file = "yarl-1.17.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6324274b4e0e2fa1b3eccb25997b1c9ed134ff61d296448ab8269f5ac068c4c"}, - {file = "yarl-1.17.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:d8a8b74d843c2638f3864a17d97a4acda58e40d3e44b6303b8cc3d3c44ae2d29"}, - {file = "yarl-1.17.1-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:7fac95714b09da9278a0b52e492466f773cfe37651cf467a83a1b659be24bf71"}, - {file = "yarl-1.17.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:c180ac742a083e109c1a18151f4dd8675f32679985a1c750d2ff806796165b55"}, - {file = "yarl-1.17.1-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:578d00c9b7fccfa1745a44f4eddfdc99d723d157dad26764538fbdda37209857"}, - {file = "yarl-1.17.1-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:1a3b91c44efa29e6c8ef8a9a2b583347998e2ba52c5d8280dbd5919c02dfc3b5"}, - {file = "yarl-1.17.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:a7ac5b4984c468ce4f4a553df281450df0a34aefae02e58d77a0847be8d1e11f"}, - {file = "yarl-1.17.1-cp39-cp39-win32.whl", hash = "sha256:7294e38f9aa2e9f05f765b28ffdc5d81378508ce6dadbe93f6d464a8c9594473"}, - {file = "yarl-1.17.1-cp39-cp39-win_amd64.whl", hash = "sha256:eb6dce402734575e1a8cc0bb1509afca508a400a57ce13d306ea2c663bad1138"}, - {file = "yarl-1.17.1-py3-none-any.whl", hash = "sha256:f1790a4b1e8e8e028c391175433b9c8122c39b46e1663228158e61e6f915bf06"}, - {file = "yarl-1.17.1.tar.gz", hash = "sha256:067a63fcfda82da6b198fa73079b1ca40b7c9b7994995b6ee38acda728b64d47"}, + {file = "yarl-1.18.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7df647e8edd71f000a5208fe6ff8c382a1de8edfbccdbbfe649d263de07d8c34"}, + {file = "yarl-1.18.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c69697d3adff5aa4f874b19c0e4ed65180ceed6318ec856ebc423aa5850d84f7"}, + {file = "yarl-1.18.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:602d98f2c2d929f8e697ed274fbadc09902c4025c5a9963bf4e9edfc3ab6f7ed"}, + {file = "yarl-1.18.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c654d5207c78e0bd6d749f6dae1dcbbfde3403ad3a4b11f3c5544d9906969dde"}, + {file = "yarl-1.18.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5094d9206c64181d0f6e76ebd8fb2f8fe274950a63890ee9e0ebfd58bf9d787b"}, + {file = "yarl-1.18.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:35098b24e0327fc4ebdc8ffe336cee0a87a700c24ffed13161af80124b7dc8e5"}, + {file = "yarl-1.18.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3236da9272872443f81fedc389bace88408f64f89f75d1bdb2256069a8730ccc"}, + {file = "yarl-1.18.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e2c08cc9b16f4f4bc522771d96734c7901e7ebef70c6c5c35dd0f10845270bcd"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:80316a8bd5109320d38eef8833ccf5f89608c9107d02d2a7f985f98ed6876990"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:c1e1cc06da1491e6734f0ea1e6294ce00792193c463350626571c287c9a704db"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:fea09ca13323376a2fdfb353a5fa2e59f90cd18d7ca4eaa1fd31f0a8b4f91e62"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e3b9fd71836999aad54084906f8663dffcd2a7fb5cdafd6c37713b2e72be1760"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:757e81cae69244257d125ff31663249b3013b5dc0a8520d73694aed497fb195b"}, + {file = "yarl-1.18.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b1771de9944d875f1b98a745bc547e684b863abf8f8287da8466cf470ef52690"}, + {file = "yarl-1.18.3-cp310-cp310-win32.whl", hash = "sha256:8874027a53e3aea659a6d62751800cf6e63314c160fd607489ba5c2edd753cf6"}, + {file = "yarl-1.18.3-cp310-cp310-win_amd64.whl", hash = "sha256:93b2e109287f93db79210f86deb6b9bbb81ac32fc97236b16f7433db7fc437d8"}, + {file = "yarl-1.18.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8503ad47387b8ebd39cbbbdf0bf113e17330ffd339ba1144074da24c545f0069"}, + {file = "yarl-1.18.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:02ddb6756f8f4517a2d5e99d8b2f272488e18dd0bfbc802f31c16c6c20f22193"}, + {file = "yarl-1.18.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:67a283dd2882ac98cc6318384f565bffc751ab564605959df4752d42483ad889"}, + {file = "yarl-1.18.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d980e0325b6eddc81331d3f4551e2a333999fb176fd153e075c6d1c2530aa8a8"}, + {file = "yarl-1.18.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b643562c12680b01e17239be267bc306bbc6aac1f34f6444d1bded0c5ce438ca"}, + {file = "yarl-1.18.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c017a3b6df3a1bd45b9fa49a0f54005e53fbcad16633870104b66fa1a30a29d8"}, + {file = "yarl-1.18.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75674776d96d7b851b6498f17824ba17849d790a44d282929c42dbb77d4f17ae"}, + {file = "yarl-1.18.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ccaa3a4b521b780a7e771cc336a2dba389a0861592bbce09a476190bb0c8b4b3"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:2d06d3005e668744e11ed80812e61efd77d70bb7f03e33c1598c301eea20efbb"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:9d41beda9dc97ca9ab0b9888cb71f7539124bc05df02c0cff6e5acc5a19dcc6e"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ba23302c0c61a9999784e73809427c9dbedd79f66a13d84ad1b1943802eaaf59"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:6748dbf9bfa5ba1afcc7556b71cda0d7ce5f24768043a02a58846e4a443d808d"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:0b0cad37311123211dc91eadcb322ef4d4a66008d3e1bdc404808992260e1a0e"}, + {file = "yarl-1.18.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:0fb2171a4486bb075316ee754c6d8382ea6eb8b399d4ec62fde2b591f879778a"}, + {file = "yarl-1.18.3-cp311-cp311-win32.whl", hash = "sha256:61b1a825a13bef4a5f10b1885245377d3cd0bf87cba068e1d9a88c2ae36880e1"}, + {file = "yarl-1.18.3-cp311-cp311-win_amd64.whl", hash = "sha256:b9d60031cf568c627d028239693fd718025719c02c9f55df0a53e587aab951b5"}, + {file = "yarl-1.18.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1dd4bdd05407ced96fed3d7f25dbbf88d2ffb045a0db60dbc247f5b3c5c25d50"}, + {file = "yarl-1.18.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7c33dd1931a95e5d9a772d0ac5e44cac8957eaf58e3c8da8c1414de7dd27c576"}, + {file = "yarl-1.18.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25b411eddcfd56a2f0cd6a384e9f4f7aa3efee14b188de13048c25b5e91f1640"}, + {file = "yarl-1.18.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:436c4fc0a4d66b2badc6c5fc5ef4e47bb10e4fd9bf0c79524ac719a01f3607c2"}, + {file = "yarl-1.18.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e35ef8683211db69ffe129a25d5634319a677570ab6b2eba4afa860f54eeaf75"}, + {file = "yarl-1.18.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:84b2deecba4a3f1a398df819151eb72d29bfeb3b69abb145a00ddc8d30094512"}, + {file = "yarl-1.18.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00e5a1fea0fd4f5bfa7440a47eff01d9822a65b4488f7cff83155a0f31a2ecba"}, + {file = "yarl-1.18.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d0e883008013c0e4aef84dcfe2a0b172c4d23c2669412cf5b3371003941f72bb"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5a3f356548e34a70b0172d8890006c37be92995f62d95a07b4a42e90fba54272"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:ccd17349166b1bee6e529b4add61727d3f55edb7babbe4069b5764c9587a8cc6"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b958ddd075ddba5b09bb0be8a6d9906d2ce933aee81100db289badbeb966f54e"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c7d79f7d9aabd6011004e33b22bc13056a3e3fb54794d138af57f5ee9d9032cb"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4891ed92157e5430874dad17b15eb1fda57627710756c27422200c52d8a4e393"}, + {file = "yarl-1.18.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ce1af883b94304f493698b00d0f006d56aea98aeb49d75ec7d98cd4a777e9285"}, + {file = "yarl-1.18.3-cp312-cp312-win32.whl", hash = "sha256:f91c4803173928a25e1a55b943c81f55b8872f0018be83e3ad4938adffb77dd2"}, + {file = "yarl-1.18.3-cp312-cp312-win_amd64.whl", hash = "sha256:7e2ee16578af3b52ac2f334c3b1f92262f47e02cc6193c598502bd46f5cd1477"}, + {file = "yarl-1.18.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:90adb47ad432332d4f0bc28f83a5963f426ce9a1a8809f5e584e704b82685dcb"}, + {file = "yarl-1.18.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:913829534200eb0f789d45349e55203a091f45c37a2674678744ae52fae23efa"}, + {file = "yarl-1.18.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ef9f7768395923c3039055c14334ba4d926f3baf7b776c923c93d80195624782"}, + {file = "yarl-1.18.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88a19f62ff30117e706ebc9090b8ecc79aeb77d0b1f5ec10d2d27a12bc9f66d0"}, + {file = "yarl-1.18.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e17c9361d46a4d5addf777c6dd5eab0715a7684c2f11b88c67ac37edfba6c482"}, + {file = "yarl-1.18.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1a74a13a4c857a84a845505fd2d68e54826a2cd01935a96efb1e9d86c728e186"}, + {file = "yarl-1.18.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:41f7ce59d6ee7741af71d82020346af364949314ed3d87553763a2df1829cc58"}, + {file = "yarl-1.18.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f52a265001d830bc425f82ca9eabda94a64a4d753b07d623a9f2863fde532b53"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:82123d0c954dc58db301f5021a01854a85bf1f3bb7d12ae0c01afc414a882ca2"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:2ec9bbba33b2d00999af4631a3397d1fd78290c48e2a3e52d8dd72db3a067ac8"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:fbd6748e8ab9b41171bb95c6142faf068f5ef1511935a0aa07025438dd9a9bc1"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:877d209b6aebeb5b16c42cbb377f5f94d9e556626b1bfff66d7b0d115be88d0a"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:b464c4ab4bfcb41e3bfd3f1c26600d038376c2de3297760dfe064d2cb7ea8e10"}, + {file = "yarl-1.18.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8d39d351e7faf01483cc7ff7c0213c412e38e5a340238826be7e0e4da450fdc8"}, + {file = "yarl-1.18.3-cp313-cp313-win32.whl", hash = "sha256:61ee62ead9b68b9123ec24bc866cbef297dd266175d53296e2db5e7f797f902d"}, + {file = "yarl-1.18.3-cp313-cp313-win_amd64.whl", hash = "sha256:578e281c393af575879990861823ef19d66e2b1d0098414855dd367e234f5b3c"}, + {file = "yarl-1.18.3-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:61e5e68cb65ac8f547f6b5ef933f510134a6bf31bb178be428994b0cb46c2a04"}, + {file = "yarl-1.18.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fe57328fbc1bfd0bd0514470ac692630f3901c0ee39052ae47acd1d90a436719"}, + {file = "yarl-1.18.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a440a2a624683108a1b454705ecd7afc1c3438a08e890a1513d468671d90a04e"}, + {file = "yarl-1.18.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09c7907c8548bcd6ab860e5f513e727c53b4a714f459b084f6580b49fa1b9cee"}, + {file = "yarl-1.18.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b4f6450109834af88cb4cc5ecddfc5380ebb9c228695afc11915a0bf82116789"}, + {file = "yarl-1.18.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a9ca04806f3be0ac6d558fffc2fdf8fcef767e0489d2684a21912cc4ed0cd1b8"}, + {file = "yarl-1.18.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77a6e85b90a7641d2e07184df5557132a337f136250caafc9ccaa4a2a998ca2c"}, + {file = "yarl-1.18.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6333c5a377c8e2f5fae35e7b8f145c617b02c939d04110c76f29ee3676b5f9a5"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:0b3c92fa08759dbf12b3a59579a4096ba9af8dd344d9a813fc7f5070d86bbab1"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:4ac515b860c36becb81bb84b667466885096b5fc85596948548b667da3bf9f24"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:045b8482ce9483ada4f3f23b3774f4e1bf4f23a2d5c912ed5170f68efb053318"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:a4bb030cf46a434ec0225bddbebd4b89e6471814ca851abb8696170adb163985"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:54d6921f07555713b9300bee9c50fb46e57e2e639027089b1d795ecd9f7fa910"}, + {file = "yarl-1.18.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:1d407181cfa6e70077df3377938c08012d18893f9f20e92f7d2f314a437c30b1"}, + {file = "yarl-1.18.3-cp39-cp39-win32.whl", hash = "sha256:ac36703a585e0929b032fbaab0707b75dc12703766d0b53486eabd5139ebadd5"}, + {file = "yarl-1.18.3-cp39-cp39-win_amd64.whl", hash = "sha256:ba87babd629f8af77f557b61e49e7c7cac36f22f871156b91e10a6e9d4f829e9"}, + {file = "yarl-1.18.3-py3-none-any.whl", hash = "sha256:b57f4f58099328dfb26c6a771d09fb20dbbae81d20cfb66141251ea063bd101b"}, + {file = "yarl-1.18.3.tar.gz", hash = "sha256:ac1801c45cbf77b6c99242eeff4fffb5e4e73a800b5c4ad4fc0be5def634d2e1"}, ] [package.dependencies] @@ -5959,4 +5900,4 @@ ingestion-bundle = ["aiofiles", "aioshutil", "beautifulsoup4", "bs4", "markdown" [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "eefaef546effe8004d3dd44729755df20d3bf7f1c57b446fd2ef1444cd0c90c9" +content-hash = "27694176c4c0ec215120e9d7bbe04d426bf2a630d5238edf39772c4b4b11e351" diff --git a/py/pyproject.toml b/py/pyproject.toml index 62c42a812..65088388e 100644 --- a/py/pyproject.toml +++ b/py/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "r2r" readme = "README.md" -version = "3.2.41" +version = "3.3.0" description = "SciPhi R2R" authors = ["Owen Colegrove "] @@ -28,7 +28,7 @@ asyncclick = "^8.1.7.2" click = "^8.0.0" fastapi = "^0.114.0" httpx = "^0.27.0" -litellm = "^1.42.3" # move back to optional after zerox integration is complete +litellm = "^1.52.8" # move back to optional after zerox integration is complete nest-asyncio = "^1.6.0" openai = "^1.11.1" posthog = "^3.5.0" @@ -38,6 +38,10 @@ toml = "^0.10.2" types-requests = "^2.31.0" unstructured-client = "^0.25.5" psycopg-binary = "^3.2.3" +aiosmtplib = "^3.0.2" +types-aiofiles = "^24.1.0.20240626" +aiohttp = "^3.10.10" +typing-extensions = "^4.12.2" # Shared dependencies (optional) aiosqlite = { version = "^0.20.0", optional = true } @@ -80,9 +84,6 @@ pypdf = { version = "^4.2.0", optional = true } pypdf2 = { version = "^3.0.1", optional = true } python-pptx = { version = "^1.0.1", optional = true } python-docx = { version = "^1.1.0", optional = true } -aiosmtplib = "^3.0.2" -types-aiofiles = "^24.1.0.20240626" -aiohttp = "^3.10.10" [tool.poetry.extras] core = [ @@ -131,9 +132,7 @@ ingestion-bundle = [ [tool.poetry.group.dev.dependencies] black = "^24.3.0" -codecov = "^2.1.13" colorama = "^0.4.6" -flake8 = "6.1.0" isort = "5.12.0" mypy = "^1.5.1" pre-commit = "^2.9" diff --git a/py/r2r.toml b/py/r2r.toml index 5966c94b5..a44a8e9a0 100644 --- a/py/r2r.toml +++ b/py/r2r.toml @@ -42,32 +42,28 @@ enable_fts = true # whether or not to enable full-text search, e.g `hybrid searc # KG settings batch_size = 256 - [database.kg_creation_settings] - kg_entity_description_prompt = "graphrag_entity_description" - kg_triples_extraction_prompt = "graphrag_triples_extraction_few_shot" + [database.graph_creation_settings] + graph_entity_description_prompt = "graphrag_entity_description" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted fragment_merge_count = 4 # number of fragments to merge into a single extraction - max_knowledge_triples = 100 + max_knowledge_relationships = 100 max_description_input_length = 65536 - generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for triplet extraction + generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for relationshipt extraction - [database.kg_entity_deduplication_settings] - kg_entity_deduplication_type = "by_name" - kg_entity_deduplication_prompt = "graphrag_entity_deduplication" + [database.graph_entity_deduplication_settings] + graph_entity_deduplication_type = "by_name" + graph_entity_deduplication_prompt = "graphrag_entity_deduplication" max_description_input_length = 65536 generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for deduplication - [database.kg_enrichment_settings] + [database.graph_enrichment_settings] community_reports_prompt = "graphrag_community_reports" max_summary_input_length = 65536 generation_config = { model = "openai/gpt-4o-mini" } # and other params, model used for node description and graph clustering leiden_params = {} - [database.kg_search_settings] - entities_level = "document" # set to collection if you've run deduplication - map_system_prompt = "graphrag_map_system" - reduce_system_prompt = "graphrag_reduce_system" + [database.graph_search_settings] generation_config = { model = "openai/gpt-4o-mini" } [embedding] @@ -105,7 +101,9 @@ excluded_parsers = ["mp4"] # document_summary_system_prompt = 'default_system' # document_summary_task_prompt = 'default_summary' # chunks_for_document_summary = 128 -# document_summary_model = "openai/gpt-4o-mini" +document_summary_model = "openai/gpt-4o-mini" +vision_img_model = "openai/gpt-4o" +vision_pdf_model = "openai/gpt-4o" [ingestion.chunk_enrichment_settings] enable_chunk_enrichment = false # disabled by default diff --git a/py/sdk/async_client.py b/py/sdk/async_client.py index fdb5596b9..a9cc70724 100644 --- a/py/sdk/async_client.py +++ b/py/sdk/async_client.py @@ -6,7 +6,7 @@ from shared.abstractions import R2RException from .base.base_client import BaseClient -from .mixins import ( +from .v2 import ( AuthMixins, IngestionMixins, KGMixins, @@ -14,6 +14,18 @@ RetrievalMixins, ServerMixins, ) +from .v3 import ( + ChunksSDK, + CollectionsSDK, + ConversationsSDK, + DocumentsSDK, + GraphsSDK, + IndicesSDK, + PromptsSDK, + RetrievalSDK, + SystemSDK, + UsersSDK, +) class R2RAsyncClient( @@ -27,12 +39,6 @@ class R2RAsyncClient( ): """ Asynchronous client for interacting with the R2R API. - - Args: - base_url (str, optional): The base URL of the R2R API. Defaults to "http://localhost:7272". - prefix (str, optional): The prefix for the API. Defaults to "/v2". - custom_client (httpx.AsyncClient, optional): A custom HTTP client. Defaults to None. - timeout (float, optional): The timeout for requests. Defaults to 300.0. """ def __init__( @@ -44,16 +50,27 @@ def __init__( ): super().__init__(base_url, prefix, timeout) self.client = custom_client or httpx.AsyncClient(timeout=timeout) - - async def _make_request(self, method: str, endpoint: str, **kwargs): - url = self._get_full_url(endpoint) + self.chunks = ChunksSDK(self) + self.collections = CollectionsSDK(self) + self.conversations = ConversationsSDK(self) + self.documents = DocumentsSDK(self) + self.graphs = GraphsSDK(self) + self.indices = IndicesSDK(self) + self.prompts = PromptsSDK(self) + self.retrieval = RetrievalSDK(self) + self.system = SystemSDK(self) + self.users = UsersSDK(self) + + async def _make_request( + self, method: str, endpoint: str, version: str = "v2", **kwargs + ): + url = self._get_full_url(endpoint, version) request_args = self._prepare_request_args(endpoint, **kwargs) try: - async with httpx.AsyncClient(timeout=self.timeout) as client: - response = await client.request(method, url, **request_args) - await self._handle_response(response) - return response.json() if response.content else None + response = await self.client.request(method, url, **request_args) + await self._handle_response(response) + return response.json() if response.content else None except httpx.RequestError as e: raise R2RException( status_code=500, @@ -61,9 +78,9 @@ async def _make_request(self, method: str, endpoint: str, **kwargs): ) from e async def _make_streaming_request( - self, method: str, endpoint: str, **kwargs + self, method: str, endpoint: str, version: str = "v2", **kwargs ) -> AsyncGenerator[Any, None]: - url = self._get_full_url(endpoint) + url = self._get_full_url(endpoint, version) request_args = self._prepare_request_args(endpoint, **kwargs) async with httpx.AsyncClient(timeout=self.timeout) as client: diff --git a/py/sdk/base/base_client.py b/py/sdk/base/base_client.py index 096cd69aa..06bacb93d 100644 --- a/py/sdk/base/base_client.py +++ b/py/sdk/base/base_client.py @@ -1,8 +1,36 @@ +import asyncio +import contextlib +from functools import wraps from typing import Optional from shared.abstractions import R2RException +def sync_wrapper(async_func): + """Decorator to convert async methods to sync methods""" + + @wraps(async_func) + def wrapper(*args, **kwargs): + loop = asyncio.get_event_loop() + return loop.run_until_complete(async_func(*args, **kwargs)) + + return wrapper + + +def sync_generator_wrapper(async_gen_func): + """Decorator to convert async generators to sync generators""" + + @wraps(async_gen_func) + def wrapper(*args, **kwargs): + async_gen = async_gen_func(*args, **kwargs) + loop = asyncio.get_event_loop() + with contextlib.suppress(StopAsyncIteration): + while True: + yield loop.run_until_complete(async_gen.__anext__()) + + return wrapper + + class BaseClient: def __init__( self, @@ -28,8 +56,8 @@ def _ensure_authenticated(self): message="Not authenticated. Please login first.", ) - def _get_full_url(self, endpoint: str) -> str: - return f"{self.base_url}{self.prefix}/{endpoint}" + def _get_full_url(self, endpoint: str, version: str = "v2") -> str: + return f"{self.base_url}/{version}/{endpoint}" def _prepare_request_args(self, endpoint: str, **kwargs) -> dict: headers = kwargs.pop("headers", {}) @@ -39,7 +67,6 @@ def _prepare_request_args(self, endpoint: str, **kwargs) -> dict: "verify_email", ]: headers.update(self._get_auth_header()) - if ( kwargs.get("params", None) == {} or kwargs.get("params", None) is None diff --git a/py/sdk/mixins/__init__.py b/py/sdk/mixins/__init__.py deleted file mode 100644 index 43ef9d2c0..000000000 --- a/py/sdk/mixins/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from .auth import AuthMixins -from .ingestion import IngestionMixins -from .kg import KGMixins -from .management import ManagementMixins -from .retrieval import RetrievalMixins -from .server import ServerMixins - -__all__ = [ - "AuthMixins", - "IngestionMixins", - "KGMixins", - "ManagementMixins", - "RetrievalMixins", - "ServerMixins", -] diff --git a/py/sdk/models.py b/py/sdk/models.py index cee80c339..277518b92 100644 --- a/py/sdk/models.py +++ b/py/sdk/models.py @@ -1,5 +1,8 @@ from shared.abstractions import ( + ChunkSearchResult, GenerationConfig, + GraphSearchResult, + GraphSearchSettings, HybridSearchSettings, KGCommunityResult, KGCreationSettings, @@ -9,26 +12,16 @@ KGGlobalResult, KGRelationshipResult, KGRunType, - KGSearchMethod, - KGSearchResult, KGSearchResultType, - KGSearchSettings, Message, MessageType, R2RException, R2RSerializable, SearchSettings, Token, - VectorSearchResult, -) -from shared.api.models import ( - KGCreationResponse, - KGEnrichmentResponse, - KGEntityDeduplicationResponse, - RAGResponse, - SearchResponse, - UserResponse, + User, ) +from shared.api.models import CombinedSearchResponse, RAGResponse __all__ = [ "GenerationConfig", @@ -40,22 +33,18 @@ "KGGlobalResult", "KGRelationshipResult", "KGRunType", - "KGSearchMethod", - "KGSearchResult", + "GraphSearchResult", "KGSearchResultType", - "KGSearchSettings", + "GraphSearchSettings", "Message", "MessageType", "R2RException", "R2RSerializable", "Token", - "VectorSearchResult", + "ChunkSearchResult", "SearchSettings", "KGEntityDeduplicationSettings", - "KGEntityDeduplicationResponse", - "KGCreationResponse", - "KGEnrichmentResponse", "RAGResponse", - "SearchResponse", - "UserResponse", + "CombinedSearchResponse", + "User", ] diff --git a/py/sdk/sync_client.py b/py/sdk/sync_client.py index b5371ebb2..d4ec60be6 100644 --- a/py/sdk/sync_client.py +++ b/py/sdk/sync_client.py @@ -1,38 +1,122 @@ import asyncio +import contextlib +import functools +import inspect +from typing import Any from .async_client import R2RAsyncClient -from .utils import SyncClientMetaclass +from .v2 import ( + SyncAuthMixins, + SyncIngestionMixins, + SyncKGMixins, + SyncManagementMixins, + SyncRetrievalMixins, + SyncServerMixins, +) -class R2RClient(R2RAsyncClient, metaclass=SyncClientMetaclass): - """ - Synchronous client for the R2R API. +class R2RClient(R2RAsyncClient): + def __init__(self, *args: Any, **kwargs: Any): + super().__init__(*args, **kwargs) + self._loop = asyncio.new_event_loop() + asyncio.set_event_loop(self._loop) - Args: - base_url (str, optional): The base URL of the R2R API. Defaults to "http://localhost:7272". - prefix (str, optional): The prefix for the API. Defaults to "/v2". - custom_client (httpx.AsyncClient, optional): A custom HTTP client. Defaults to None. - timeout (float, optional): The timeout for requests. Defaults to 300.0. - """ + # Store async version of _make_request + self._async_make_request = self._make_request - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) + # Only wrap v3 methods since they're already working + self._wrap_v3_methods() + + # Override v2 methods with sync versions + self._override_v2_methods() + + def _make_sync_request(self, *args, **kwargs): + """Sync version of _make_request for v2 methods""" + return self._loop.run_until_complete( + self._async_make_request(*args, **kwargs) + ) + + def _override_v2_methods(self): + """ + Replace async v2 methods with sync versions + This is really ugly, but it's the only way to make it work once we + remove v2, we can just resort to the metaclass approach that is in utils + """ + sync_mixins = { + SyncAuthMixins: ["auth_methods"], + SyncIngestionMixins: ["ingestion_methods"], + SyncKGMixins: ["kg_methods"], + SyncManagementMixins: ["management_methods"], + SyncRetrievalMixins: ["retrieval_methods"], + SyncServerMixins: ["server_methods"], + } + + for sync_class in sync_mixins: + for name, method in sync_class.__dict__.items(): + if not name.startswith("_") and inspect.isfunction(method): + # Create a wrapper that uses sync _make_request + def wrap_method(m): + def wrapped(self, *args, **kwargs): + # Temporarily swap _make_request + original_make_request = self._make_request + self._make_request = self._make_sync_request + try: + return m(self, *args, **kwargs) + finally: + # Restore original _make_request + self._make_request = original_make_request + + return wrapped + + bound_method = wrap_method(method).__get__( + self, self.__class__ + ) + setattr(self, name, bound_method) + + def _wrap_v3_methods(self) -> None: + """Wraps only v3 SDK object methods""" + sdk_objects = [ + self.chunks, + self.collections, + self.conversations, + self.documents, + self.graphs, + self.indices, + self.prompts, + self.retrieval, + self.users, + ] + + for sdk_obj in sdk_objects: + for name in dir(sdk_obj): + if name.startswith("_"): + continue - def _make_streaming_request(self, method: str, endpoint: str, **kwargs): - async_gen = super()._make_streaming_request(method, endpoint, **kwargs) - return self._sync_generator(async_gen) + attr = getattr(sdk_obj, name) + if inspect.iscoroutinefunction(attr): + wrapped = self._make_sync_method(attr) + setattr(sdk_obj, name, wrapped) - def _sync_generator(self, async_gen): - loop = asyncio.get_event_loop() - try: - while True: - yield loop.run_until_complete(async_gen.__anext__()) - except StopAsyncIteration: - pass + def _make_sync_method(self, async_method): + @functools.wraps(async_method) + def wrapped(*args, **kwargs): + return self._loop.run_until_complete(async_method(*args, **kwargs)) - def __enter__(self): - return self + return wrapped - def __exit__(self, exc_type, exc_val, exc_tb): - loop = asyncio.get_event_loop() - loop.run_until_complete(self.close()) + def __del__(self): + if hasattr(self, "_loop") and self._loop is not None: + with contextlib.suppress(Exception): + if not self._loop.is_closed(): + try: + self._loop.run_until_complete(self._async_close()) + except RuntimeError: + # If the event loop is already running, we can't use run_until_complete + if self._loop.is_running(): + self._loop.call_soon_threadsafe(self._sync_close) + else: + asyncio.run_coroutine_threadsafe( + self._async_close(), self._loop + ) + finally: + self._loop.close() diff --git a/py/sdk/utils.py b/py/sdk/utils.py index 4a64ed171..90877d68c 100644 --- a/py/sdk/utils.py +++ b/py/sdk/utils.py @@ -1,3 +1,8 @@ +""" +Not currently being used due to some grossness with the way that the v2 SDK methods were structured, but +we can return to this once we remove v2 SDK methods. +""" + import asyncio import functools import inspect diff --git a/py/sdk/v2/__init__.py b/py/sdk/v2/__init__.py new file mode 100644 index 000000000..8b845b95d --- /dev/null +++ b/py/sdk/v2/__init__.py @@ -0,0 +1,27 @@ +from .auth import AuthMixins +from .ingestion import IngestionMixins +from .kg import KGMixins +from .management import ManagementMixins +from .retrieval import RetrievalMixins +from .server import ServerMixins +from .sync_auth import SyncAuthMixins +from .sync_ingestion import SyncIngestionMixins +from .sync_kg import SyncKGMixins +from .sync_management import SyncManagementMixins +from .sync_retrieval import SyncRetrievalMixins +from .sync_server import SyncServerMixins + +__all__ = [ + "AuthMixins", + "IngestionMixins", + "KGMixins", + "ManagementMixins", + "RetrievalMixins", + "ServerMixins", + "SyncAuthMixins", + "SyncIngestionMixins", + "SyncKGMixins", + "SyncManagementMixins", + "SyncRetrievalMixins", + "SyncServerMixins", +] diff --git a/py/sdk/mixins/auth.py b/py/sdk/v2/auth.py similarity index 86% rename from py/sdk/mixins/auth.py rename to py/sdk/v2/auth.py index d3ca201ac..e09a80233 100644 --- a/py/sdk/mixins/auth.py +++ b/py/sdk/v2/auth.py @@ -1,11 +1,16 @@ +from __future__ import annotations # for Python 3.10+ + from typing import Optional, Union from uuid import UUID -from ..models import Token, UserResponse +from typing_extensions import deprecated + +from ..models import Token, User class AuthMixins: - async def register(self, email: str, password: str) -> UserResponse: + @deprecated("Use client.users.register() instead") + async def register(self, email: str, password: str) -> User: """ Registers a new user with the given email and password. @@ -14,12 +19,13 @@ async def register(self, email: str, password: str) -> UserResponse: password (str): The password of the user to register. Returns: - UserResponse: The response from the server. + User: The response from the server. """ data = {"email": email, "password": password} return await self._make_request("POST", "register", json=data) # type: ignore - async def verify_email(self, verification_code: str) -> dict: + @deprecated("Use client.users.verify_email() instead") + async def verify_email(self, email: str, verification_code: str) -> dict: """ Verifies the email of a user with the given verification code. @@ -27,12 +33,14 @@ async def verify_email(self, verification_code: str) -> dict: verification_code (str): The verification code to verify the email with. """ + data = {"email": email, "verification_code": verification_code} return await self._make_request( # type: ignore "POST", "verify_email", - json=verification_code, + json=data, ) + @deprecated("Use client.users.login() instead") async def login(self, email: str, password: str) -> dict[str, Token]: """ Attempts to log in a user with the given email and password. @@ -50,6 +58,7 @@ async def login(self, email: str, password: str) -> dict[str, Token]: self._refresh_token = response["results"]["refresh_token"]["token"] return response + @deprecated("Use client.users.logout() instead") async def logout(self) -> dict: """ Logs out the currently authenticated user. @@ -62,15 +71,17 @@ async def logout(self) -> dict: self._refresh_token = None return response - async def user(self) -> UserResponse: + @deprecated("Use client.users.retrieve() instead") + async def user(self) -> User: """ Retrieves the user information for the currently authenticated user. Returns: - UserResponse: The response from the server. + User: The response from the server. """ return await self._make_request("GET", "user") # type: ignore + @deprecated("Use client.users.update() instead") async def update_user( self, user_id: Union[str, UUID], @@ -79,7 +90,7 @@ async def update_user( name: Optional[str] = None, bio: Optional[str] = None, profile_picture: Optional[str] = None, - ) -> UserResponse: + ) -> User: """ Updates the profile information for the currently authenticated user. @@ -92,7 +103,7 @@ async def update_user( profile_picture (str, optional): The updated profile picture URL for the user. Returns: - UserResponse: The response from the server. + User: The response from the server. """ data = { "user_id": user_id, @@ -105,6 +116,7 @@ async def update_user( data = {k: v for k, v in data.items() if v is not None} return await self._make_request("PUT", "user", json=data) # type: ignore + @deprecated("Use client.users.refresh_token() instead") async def refresh_access_token(self) -> dict[str, Token]: """ Refreshes the access token for the currently authenticated user. @@ -119,6 +131,7 @@ async def refresh_access_token(self) -> dict[str, Token]: self._refresh_token = response["results"]["refresh_token"]["token"] return response + @deprecated("Use client.users.change_password() instead") async def change_password( self, current_password: str, new_password: str ) -> dict: @@ -138,6 +151,7 @@ async def change_password( } return await self._make_request("POST", "change_password", json=data) # type: ignore + @deprecated("Use client.users.request_password_reset() instead") async def request_password_reset(self, email: str) -> dict: """ Requests a password reset for the user with the given email. @@ -152,6 +166,7 @@ async def request_password_reset(self, email: str) -> dict: "POST", "request_password_reset", json=email ) + @deprecated("Use client.users.reset_password() instead") async def confirm_password_reset( self, reset_token: str, new_password: str ) -> dict: @@ -168,6 +183,7 @@ async def confirm_password_reset( data = {"reset_token": reset_token, "new_password": new_password} return await self._make_request("POST", "reset_password", json=data) # type: ignore + @deprecated("Use client.users.login_with_token() instead") async def login_with_token( self, access_token: str, @@ -197,6 +213,7 @@ async def login_with_token( self._refresh_token = None raise ValueError("Invalid tokens provided") + @deprecated("") async def get_user_verification_code( self, user_id: Union[str, UUID] ) -> dict: diff --git a/py/sdk/mixins/ingestion.py b/py/sdk/v2/ingestion.py similarity index 93% rename from py/sdk/mixins/ingestion.py rename to py/sdk/v2/ingestion.py index 980036e03..41579e0bb 100644 --- a/py/sdk/mixins/ingestion.py +++ b/py/sdk/v2/ingestion.py @@ -1,13 +1,18 @@ +from __future__ import annotations # for Python 3.10+ + import json import os from contextlib import ExitStack from typing import Optional, Union from uuid import UUID +from typing_extensions import deprecated + from shared.abstractions import IndexMeasure, IndexMethod, VectorTableName class IngestionMixins: + @deprecated("Use client.documents.create() instead") async def ingest_files( self, file_paths: list[str], @@ -93,6 +98,7 @@ async def ingest_files( "POST", "ingest_files", data=data, files=files_tuples ) + @deprecated("Use client.documents.update() instead") async def update_files( self, file_paths: list[str], @@ -163,6 +169,7 @@ async def update_files( "POST", "update_files", data=data, files=files ) + @deprecated("Use client.chunks.create() instead") async def ingest_chunks( self, chunks: list[dict], @@ -204,10 +211,11 @@ async def ingest_chunks( return await self._make_request("POST", "ingest_chunks", json=data) # type: ignore + @deprecated("Use client.chunks.update() instead") async def update_chunks( self, document_id: UUID, - extraction_id: UUID, + chunk_id: UUID, text: str, metadata: Optional[dict] = None, run_with_orchestration: Optional[bool] = None, @@ -217,7 +225,7 @@ async def update_chunks( Args: document_id (UUID): The ID of the document containing the chunk. - extraction_id (UUID): The ID of the chunk to update. + chunk_id (UUID): The ID of the chunk to update. text (str): The new text content of the chunk. metadata (Optional[dict]): Metadata dictionary for the chunk. run_with_orchestration (Optional[bool]): Whether to run the update through orchestration. @@ -235,11 +243,12 @@ async def update_chunks( # Remove None values from payload data = {k: v for k, v in data.items() if v is not None} - return await self._make_request("PUT", f"update_chunk/{document_id}/{extraction_id}", json=data) # type: ignore + return await self._make_request("PUT", f"update_chunk/{document_id}/{chunk_id}", json=data) # type: ignore + @deprecated("Use client.indices.create() instead") async def create_vector_index( self, - table_name: VectorTableName = VectorTableName.VECTORS, + table_name: VectorTableName = VectorTableName.CHUNKS, index_method: IndexMethod = IndexMethod.hnsw, index_measure: IndexMeasure = IndexMeasure.cosine_distance, index_arguments: Optional[dict] = None, @@ -274,9 +283,10 @@ async def create_vector_index( "POST", "create_vector_index", json=data ) + @deprecated("Use client.indices.list() instead") async def list_vector_indices( self, - table_name: VectorTableName = VectorTableName.VECTORS, + table_name: VectorTableName = VectorTableName.CHUNKS, ) -> dict: """ List all vector indices for a given table. @@ -292,10 +302,11 @@ async def list_vector_indices( "GET", "list_vector_indices", params=params ) + @deprecated("Use client.indices.delete() instead") async def delete_vector_index( self, index_name: str, - table_name: VectorTableName = VectorTableName.VECTORS, + table_name: VectorTableName = VectorTableName.CHUNKS, concurrently: bool = True, ) -> dict: """ @@ -318,6 +329,7 @@ async def delete_vector_index( "DELETE", "delete_vector_index", json=data ) + @deprecated("Use client.documents.update() instead") async def update_document_metadata( self, document_id: Union[str, UUID], diff --git a/py/sdk/mixins/kg.py b/py/sdk/v2/kg.py similarity index 89% rename from py/sdk/mixins/kg.py rename to py/sdk/v2/kg.py index 87c090636..e906c4e8a 100644 --- a/py/sdk/mixins/kg.py +++ b/py/sdk/v2/kg.py @@ -4,7 +4,6 @@ from ..models import ( KGCreationSettings, KGEnrichmentSettings, - KGEntityDeduplicationResponse, KGEntityDeduplicationSettings, KGRunType, ) @@ -55,7 +54,7 @@ async def enrich_graph( run_type (Optional[Union[str, KGRunType]]): The type of run to perform. kg_enrichment_settings (Optional[Union[dict, KGEnrichmentSettings]]): Settings for the graph enrichment process. Returns: - KGEnrichmentResponse: Results of the graph enrichment process. + Results of the graph enrichment process. """ if isinstance(kg_enrichment_settings, KGEnrichmentSettings): kg_enrichment_settings = kg_enrichment_settings.model_dump() @@ -107,41 +106,41 @@ async def get_triples( self, collection_id: Optional[Union[UUID, str]] = None, entity_names: Optional[list[str]] = None, - triple_ids: Optional[list[str]] = None, + relationship_ids: Optional[list[str]] = None, offset: Optional[int] = None, limit: Optional[int] = None, ) -> dict: """ - Retrieve triples from the knowledge graph. + Retrieve relationships from the knowledge graph. Args: - collection_id (str): The ID of the collection to retrieve triples from. + collection_id (str): The ID of the collection to retrieve relationships from. offset (int): The offset for pagination. limit (int): The limit for pagination. entity_names (Optional[List[str]]): Optional list of entity names to filter by. - triple_ids (Optional[List[str]]): Optional list of triple IDs to filter by. + relationship_ids (Optional[List[str]]): Optional list of relationship IDs to filter by. Returns: - dict: A dictionary containing the retrieved triples and total count. + dict: A dictionary containing the retrieved relationships and total count. """ params = { "collection_id": collection_id, "entity_names": entity_names, - "triple_ids": triple_ids, + "relationship_ids": relationship_ids, "offset": offset, "limit": limit, } params = {k: v for k, v in params.items() if v is not None} - return await self._make_request("GET", "triples", params=params) # type: ignore + return await self._make_request("GET", "relationships", params=params) # type: ignore async def get_communities( self, collection_id: Optional[Union[UUID, str]] = None, levels: Optional[list[int]] = None, - community_numbers: Optional[list[int]] = None, + community_ids: Optional[list[UUID]] = None, offset: Optional[int] = None, limit: Optional[int] = None, ) -> dict: @@ -153,7 +152,7 @@ async def get_communities( offset (int): The offset for pagination. limit (int): The limit for pagination. levels (Optional[List[int]]): Optional list of levels to filter by. - community_numbers (Optional[List[int]]): Optional list of community numbers to filter by. + community_ids (Optional[List[int]]): Optional list of community numbers to filter by. Returns: dict: A dictionary containing the retrieved communities. @@ -162,7 +161,7 @@ async def get_communities( params = { "collection_id": collection_id, "levels": levels, - "community_numbers": community_numbers, + "community_ids": community_ids, "offset": offset, "limit": limit, } @@ -216,7 +215,7 @@ async def deduplicate_entities( deduplication_settings: Optional[ Union[dict, KGEntityDeduplicationSettings] ] = None, - ) -> KGEntityDeduplicationResponse: + ): """ Deduplicate entities in the knowledge graph. Args: @@ -245,9 +244,9 @@ async def delete_graph_for_collection( Args: collection_id (Union[UUID, str]): The ID of the collection to delete the graph for. - cascade (bool): Whether to cascade the deletion, and delete entities and triples belonging to the collection. + cascade (bool): Whether to cascade the deletion, and delete entities and relationships belonging to the collection. - NOTE: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. + NOTE: Setting this flag to true will delete entities and relationships for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and relationships for all documents in the collection. """ data = { diff --git a/py/sdk/mixins/management.py b/py/sdk/v2/management.py similarity index 92% rename from py/sdk/mixins/management.py rename to py/sdk/v2/management.py index 21e304fbe..876108ec0 100644 --- a/py/sdk/mixins/management.py +++ b/py/sdk/v2/management.py @@ -1,11 +1,16 @@ +from __future__ import annotations # for Python 3.10+ + import json from typing import Any, Optional, Union from uuid import UUID +from typing_extensions import deprecated + from ..models import Message class ManagementMixins: + @deprecated("Use client.prompts.update() instead") async def update_prompt( self, name: str, @@ -31,6 +36,7 @@ async def update_prompt( return await self._make_request("POST", "update_prompt", json=data) # type: ignore + @deprecated("Use client.prompts.create() instead") async def add_prompt( self, name: str, @@ -55,6 +61,7 @@ async def add_prompt( } return await self._make_request("POST", "add_prompt", json=data) # type: ignore + @deprecated("Use client.prompts.retrieve() instead") async def get_prompt( self, prompt_name: str, @@ -81,6 +88,7 @@ async def get_prompt( "GET", f"get_prompt/{prompt_name}", params=params ) + @deprecated("Use client.prompts.list() instead") async def get_all_prompts(self) -> dict: """ Get all prompts from the system. @@ -90,6 +98,7 @@ async def get_all_prompts(self) -> dict: """ return await self._make_request("GET", "get_all_prompts") # type: ignore + @deprecated("Use client.prompts.delete() instead") async def delete_prompt(self, prompt_name: str) -> dict: """ Delete a prompt from the system. @@ -104,6 +113,9 @@ async def delete_prompt(self, prompt_name: str) -> dict: "DELETE", f"delete_prompt/{prompt_name}" ) + @deprecated( + "This method is deprecated. New, improved analytics features will be added in a future release." + ) async def analytics( self, filter_criteria: Optional[Union[dict, str]] = None, @@ -133,6 +145,7 @@ async def analytics( return await self._make_request("GET", "analytics", params=params) # type: ignore + @deprecated("Use client.system.settings() instead") async def app_settings(self) -> dict: """ Get the configuration settings for the app. @@ -142,6 +155,7 @@ async def app_settings(self) -> dict: """ return await self._make_request("GET", "app_settings") # type: ignore + @deprecated("Use client.users.list() instead") async def users_overview( self, user_ids: Optional[list[str]] = None, @@ -168,6 +182,7 @@ async def users_overview( "GET", "users_overview", params=params ) + @deprecated("Use client..delete() instead") async def delete( self, filters: dict, @@ -187,6 +202,7 @@ async def delete( "DELETE", "delete", params={"filters": filters_json} ) or {"results": {}} + @deprecated("Use client.documents.download() instead") async def download_file( self, document_id: Union[str, UUID], @@ -204,6 +220,7 @@ async def download_file( "GET", f"download_file/{str(document_id)}" ) + @deprecated("Use client.documents.list() instead") async def documents_overview( self, document_ids: Optional[list[Union[UUID, str]]] = None, @@ -233,6 +250,7 @@ async def documents_overview( "GET", "documents_overview", params=params ) + @deprecated("Use client.documents.list_chunks() instead") async def document_chunks( self, document_id: str, @@ -265,6 +283,7 @@ async def document_chunks( "GET", f"document_chunks/{document_id}", params=params ) + @deprecated("Use client.collections.list() instead") async def collections_overview( self, collection_ids: Optional[list[str]] = None, @@ -293,6 +312,7 @@ async def collections_overview( "GET", "collections_overview", params=params ) + @deprecated("Use client.collections.create() instead") async def create_collection( self, name: str, @@ -316,6 +336,7 @@ async def create_collection( "POST", "create_collection", json=data ) + @deprecated("Use client.collections.retrieve() instead") async def get_collection( self, collection_id: Union[str, UUID], @@ -333,6 +354,7 @@ async def get_collection( "GET", f"get_collection/{str(collection_id)}" ) + @deprecated("Use client.collections.update() instead") async def update_collection( self, collection_id: Union[str, UUID], @@ -360,6 +382,7 @@ async def update_collection( "PUT", "update_collection", json=data ) + @deprecated("Use client.collections.delete() instead") async def delete_collection( self, collection_id: Union[str, UUID], @@ -377,6 +400,7 @@ async def delete_collection( "DELETE", f"delete_collection/{str(collection_id)}" ) + @deprecated("Use client.users.delete() instead") async def delete_user( self, user_id: str, @@ -404,6 +428,7 @@ async def delete_user( "DELETE", f"user/{user_id}", json=params ) + @deprecated("Use client.collections.list() instead") async def list_collections( self, offset: Optional[int] = None, @@ -428,6 +453,7 @@ async def list_collections( "GET", "list_collections", params=params ) + @deprecated("Use client.collections.add_user() instead") async def add_user_to_collection( self, user_id: Union[str, UUID], @@ -451,6 +477,7 @@ async def add_user_to_collection( "POST", "add_user_to_collection", json=data ) + @deprecated("Use client.collections.remove_user() instead") async def remove_user_from_collection( self, user_id: Union[str, UUID], @@ -474,6 +501,7 @@ async def remove_user_from_collection( "POST", "remove_user_from_collection", json=data ) + @deprecated("Use client.collections.list_users() instead") async def get_users_in_collection( self, collection_id: Union[str, UUID], @@ -502,6 +530,7 @@ async def get_users_in_collection( params=params, ) + @deprecated("Use client.users.list_collections() instead") async def user_collections( self, user_id: Union[str, UUID], @@ -531,6 +560,7 @@ async def user_collections( "GET", f"user_collections/{str(user_id)}", params=params ) + @deprecated("Use client.collections.add_document() instead") async def assign_document_to_collection( self, document_id: Union[str, UUID], @@ -555,6 +585,7 @@ async def assign_document_to_collection( ) # TODO: Verify that this method is implemented, also, should be a PUT request + @deprecated("Use client.collections.remove_document() instead") async def remove_document_from_collection( self, document_id: Union[str, UUID], @@ -578,6 +609,7 @@ async def remove_document_from_collection( "POST", "remove_document_from_collection", json=data ) + @deprecated("Use client.documents.list_collections() instead") async def document_collections( self, document_id: Union[str, UUID], @@ -609,6 +641,7 @@ async def document_collections( "GET", f"document_collections/{str(document_id)}" ) + @deprecated("Use client.collections.list_documents() instead") async def documents_in_collection( self, collection_id: Union[str, UUID], @@ -635,6 +668,7 @@ async def documents_in_collection( "GET", f"collection/{str(collection_id)}/documents", params=params ) + @deprecated("Use client.conversations.list() instead") async def conversations_overview( self, conversation_ids: Optional[list[Union[UUID, str]]] = None, @@ -663,6 +697,7 @@ async def conversations_overview( "GET", "conversations_overview", params=params ) + @deprecated("Use client.conversations.retrieve() instead") async def get_conversation( self, conversation_id: Union[str, UUID], @@ -683,6 +718,7 @@ async def get_conversation( "GET", f"get_conversation/{str(conversation_id)}{query_params}" ) + @deprecated("Use client.conversations.create() instead") async def create_conversation(self) -> dict: """ Create a new conversation. @@ -692,6 +728,7 @@ async def create_conversation(self) -> dict: """ return await self._make_request("POST", "create_conversation") # type: ignore + @deprecated("Use client.conversations.add_message() instead") async def add_message( self, conversation_id: Union[str, UUID], @@ -725,6 +762,7 @@ async def add_message( "POST", f"add_message/{str(conversation_id)}", data=data ) + @deprecated("Use client.conversations.update_message() instead") async def update_message( self, message_id: str, @@ -763,6 +801,7 @@ async def update_message_metadata( "PATCH", f"messages/{message_id}/metadata", json=metadata ) + @deprecated("Use client.conversations.list_branches() instead") async def branches_overview( self, conversation_id: Union[str, UUID], @@ -822,6 +861,7 @@ async def branches_overview( # """ # return await self._make_request("POST", f"branch_at_message/{str(conversation_id)}/{message_id}") # type: ignore + @deprecated("Use client.conversations.delete() instead") async def delete_conversation( self, conversation_id: Union[str, UUID], diff --git a/py/sdk/mixins/retrieval.py b/py/sdk/v2/retrieval.py similarity index 62% rename from py/sdk/mixins/retrieval.py rename to py/sdk/v2/retrieval.py index 0b08479f8..747058ca7 100644 --- a/py/sdk/mixins/retrieval.py +++ b/py/sdk/v2/retrieval.py @@ -1,12 +1,15 @@ +from __future__ import annotations # for Python 3.10+ + import logging -from typing import AsyncGenerator, Optional, Union +from typing import AsyncGenerator, Optional + +from typing_extensions import deprecated from ..models import ( GenerationConfig, - KGSearchSettings, + GraphSearchSettings, Message, RAGResponse, - SearchResponse, SearchSettings, ) @@ -17,15 +20,15 @@ class RetrievalMixins: async def search_documents( self, query: str, - settings: Optional[Union[dict, SearchSettings]] = None, - ) -> SearchResponse: + settings: Optional[dict] = None, + ): """ Conduct a vector and/or KG search. Args: query (str): The query to search for. - vector_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. - kg_search_settings (Optional[Union[dict, KGSearchSettings]]): KG search settings. + chunk_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. + graph_search_settings (Optional[Union[dict, GraphSearchSettings]]): KG search settings. Returns: SearchResponse: The search response. @@ -39,41 +42,45 @@ async def search_documents( } return await self._make_request("POST", "search_documents", json=data) # type: ignore + @deprecated("Use client.retrieval.search() instead") async def search( self, query: str, - vector_search_settings: Optional[Union[dict, SearchSettings]] = None, - kg_search_settings: Optional[Union[dict, KGSearchSettings]] = None, - ) -> SearchResponse: + chunk_search_settings: Optional[dict | SearchSettings] = None, + graph_search_settings: Optional[dict | GraphSearchSettings] = None, + ): """ Conduct a vector and/or KG search. Args: query (str): The query to search for. - vector_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. - kg_search_settings (Optional[Union[dict, KGSearchSettings]]): KG search settings. + chunk_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. + graph_search_settings (Optional[Union[dict, GraphSearchSettings]]): KG search settings. Returns: - SearchResponse: The search response. + CombinedSearchResponse: The search response. """ - if vector_search_settings and not isinstance( - vector_search_settings, dict + if chunk_search_settings and not isinstance( + chunk_search_settings, dict + ): + chunk_search_settings = chunk_search_settings.model_dump() + if graph_search_settings and not isinstance( + graph_search_settings, dict ): - vector_search_settings = vector_search_settings.model_dump() - if kg_search_settings and not isinstance(kg_search_settings, dict): - kg_search_settings = kg_search_settings.model_dump() + graph_search_settings = graph_search_settings.model_dump() data = { "query": query, - "vector_search_settings": vector_search_settings, - "kg_search_settings": kg_search_settings, + "chunk_search_settings": chunk_search_settings, + "graph_search_settings": graph_search_settings, } return await self._make_request("POST", "search", json=data) # type: ignore + @deprecated("Use client.retrieval.completion() instead") async def completion( self, - messages: list[Union[dict, Message]], - generation_config: Optional[Union[dict, GenerationConfig]] = None, + messages: list[dict | Message], + generation_config: Optional[dict | GenerationConfig] = None, ): cast_messages: list[Message] = [ Message(**msg) if isinstance(msg, dict) else msg @@ -90,23 +97,24 @@ async def completion( return await self._make_request("POST", "completion", json=data) # type: ignore + @deprecated("Use client.retrieval.rag() instead") async def rag( self, query: str, - rag_generation_config: Optional[Union[dict, GenerationConfig]] = None, - vector_search_settings: Optional[Union[dict, SearchSettings]] = None, - kg_search_settings: Optional[Union[dict, KGSearchSettings]] = None, + rag_generation_config: Optional[dict | GenerationConfig] = None, + chunk_search_settings: Optional[dict | SearchSettings] = None, + graph_search_settings: Optional[dict | GraphSearchSettings] = None, task_prompt_override: Optional[str] = None, include_title_if_available: Optional[bool] = False, - ) -> Union[RAGResponse, AsyncGenerator[RAGResponse, None]]: + ) -> RAGResponse | AsyncGenerator[RAGResponse, None]: """ Conducts a Retrieval Augmented Generation (RAG) search with the given query. Args: query (str): The query to search for. rag_generation_config (Optional[Union[dict, GenerationConfig]]): RAG generation configuration. - vector_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. - kg_search_settings (Optional[Union[dict, KGSearchSettings]]): KG search settings. + chunk_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. + graph_search_settings (Optional[Union[dict, GraphSearchSettings]]): KG search settings. task_prompt_override (Optional[str]): Task prompt override. include_title_if_available (Optional[bool]): Include the title if available. @@ -117,18 +125,20 @@ async def rag( rag_generation_config, dict ): rag_generation_config = rag_generation_config.model_dump() - if vector_search_settings and not isinstance( - vector_search_settings, dict + if chunk_search_settings and not isinstance( + chunk_search_settings, dict ): - vector_search_settings = vector_search_settings.model_dump() - if kg_search_settings and not isinstance(kg_search_settings, dict): - kg_search_settings = kg_search_settings.model_dump() + chunk_search_settings = chunk_search_settings.model_dump() + if graph_search_settings and not isinstance( + graph_search_settings, dict + ): + graph_search_settings = graph_search_settings.model_dump() data = { "query": query, "rag_generation_config": rag_generation_config, - "vector_search_settings": vector_search_settings, - "kg_search_settings": kg_search_settings, + "chunk_search_settings": chunk_search_settings, + "graph_search_settings": graph_search_settings, "task_prompt_override": task_prompt_override, "include_title_if_available": include_title_if_available, } @@ -140,27 +150,28 @@ async def rag( else: return await self._make_request("POST", "rag", json=data) # type: ignore + @deprecated("Use client.retrieval.agent() instead") async def agent( self, - message: Optional[Union[dict, Message]] = None, - rag_generation_config: Optional[Union[dict, GenerationConfig]] = None, - vector_search_settings: Optional[Union[dict, SearchSettings]] = None, - kg_search_settings: Optional[Union[dict, KGSearchSettings]] = None, + message: Optional[dict | Message] = None, + rag_generation_config: Optional[dict | GenerationConfig] = None, + chunk_search_settings: Optional[dict | SearchSettings] = None, + graph_search_settings: Optional[dict | GraphSearchSettings] = None, task_prompt_override: Optional[str] = None, include_title_if_available: Optional[bool] = False, conversation_id: Optional[str] = None, branch_id: Optional[str] = None, # TODO - Deprecate messages - messages: Optional[Union[dict, Message]] = None, - ) -> Union[list[Message], AsyncGenerator[Message, None]]: + messages: Optional[dict | Message] = None, + ) -> list[Message] | AsyncGenerator[Message, None]: """ Performs a single turn in a conversation with a RAG agent. Args: messages (List[Union[dict, Message]]): The messages to send to the agent. rag_generation_config (Optional[Union[dict, GenerationConfig]]): RAG generation configuration. - vector_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. - kg_search_settings (Optional[Union[dict, KGSearchSettings]]): KG search settings. + chunk_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. + graph_search_settings (Optional[Union[dict, GraphSearchSettings]]): KG search settings. task_prompt_override (Optional[str]): Task prompt override. include_title_if_available (Optional[bool]): Include the title if available. @@ -175,17 +186,19 @@ async def agent( rag_generation_config, dict ): rag_generation_config = rag_generation_config.model_dump() - if vector_search_settings and not isinstance( - vector_search_settings, dict + if chunk_search_settings and not isinstance( + chunk_search_settings, dict + ): + chunk_search_settings = chunk_search_settings.model_dump() + if graph_search_settings and not isinstance( + graph_search_settings, dict ): - vector_search_settings = vector_search_settings.model_dump() - if kg_search_settings and not isinstance(kg_search_settings, dict): - kg_search_settings = kg_search_settings.model_dump() + graph_search_settings = graph_search_settings.model_dump() data = { "rag_generation_config": rag_generation_config or {}, - "vector_search_settings": vector_search_settings or {}, - "kg_search_settings": kg_search_settings, + "chunk_search_settings": chunk_search_settings or {}, + "graph_search_settings": graph_search_settings, "task_prompt_override": task_prompt_override, "include_title_if_available": include_title_if_available, "conversation_id": conversation_id, @@ -215,6 +228,7 @@ async def agent( else: return await self._make_request("POST", "agent", json=data) # type: ignore + @deprecated("Use client.retrieval.embedding() instead") async def embedding( self, content: str, diff --git a/py/sdk/mixins/server.py b/py/sdk/v2/server.py similarity index 86% rename from py/sdk/mixins/server.py rename to py/sdk/v2/server.py index f85afd1c2..a88f6298a 100644 --- a/py/sdk/mixins/server.py +++ b/py/sdk/v2/server.py @@ -1,7 +1,12 @@ +from __future__ import annotations # for Python 3.10+ + from typing import Optional +from typing_extensions import deprecated + class ServerMixins: + @deprecated("Use client.system.health() instead") async def health(self) -> dict: return await self._make_request("GET", "health") # type: ignore @@ -14,6 +19,7 @@ async def server_stats(self) -> dict: """ return await self._make_request("GET", "server_stats") # type: ignore + @deprecated("Use client.system.logs() instead") async def logs( self, offset: Optional[int] = None, diff --git a/py/sdk/v2/sync_auth.py b/py/sdk/v2/sync_auth.py new file mode 100644 index 000000000..f53cb1a1e --- /dev/null +++ b/py/sdk/v2/sync_auth.py @@ -0,0 +1,244 @@ +from __future__ import annotations # for Python 3.10+ + +from typing import Optional, Union +from uuid import UUID + +from typing_extensions import deprecated + +from ..models import Token, User + + +class SyncAuthMixins: + @deprecated("Use client.users.register() instead") + def register(self, email: str, password: str) -> User: + """ + Registers a new user with the given email and password. + + Args: + email (str): The email of the user to register. + password (str): The password of the user to register. + + Returns: + User: The response from the server. + """ + data = {"email": email, "password": password} + return self._make_request("POST", "register", json=data) # type: ignore + + @deprecated("Use client.users.verify_email() instead") + def verify_email(self, email: str, verification_code: str) -> dict: + """ + Verifies the email of a user with the given verification code. + + Args: + verification_code (str): The verification code to verify the email with. + + """ + data = {"email": email, "verification_code": verification_code} + return self._make_request( # type: ignore + "POST", + "verify_email", + json=data, + ) + + @deprecated("Use client.users.login() instead") + def login(self, email: str, password: str) -> dict[str, Token]: + """ + Attempts to log in a user with the given email and password. + + Args: + email (str): The email of the user to log in. + password (str): The password of the user to log in. + + Returns: + dict[str, Token]: The access and refresh tokens from the server. + """ + data = {"username": email, "password": password} + response = self._make_request("POST", "login", data=data) # type: ignore + self.access_token = response["results"]["access_token"]["token"] + self._refresh_token = response["results"]["refresh_token"]["token"] + return response + + @deprecated("Use client.users.logout() instead") + def logout(self) -> dict: + """ + Logs out the currently authenticated user. + + Returns: + dict: The response from the server. + """ + response = self._make_request("POST", "logout") # type: ignore + self.access_token = None + self._refresh_token = None + return response + + @deprecated("Use client.users.retrieve() instead") + def user(self) -> User: + """ + Retrieves the user information for the currently authenticated user. + + Returns: + User: The response from the server. + """ + return self._make_request("GET", "user") # type: ignore + + @deprecated("Use client.users.update() instead") + def update_user( + self, + user_id: Union[str, UUID], + email: Optional[str] = None, + is_superuser: Optional[bool] = None, + name: Optional[str] = None, + bio: Optional[str] = None, + profile_picture: Optional[str] = None, + ) -> User: + """ + Updates the profile information for the currently authenticated user. + + Args: + user_id (Union[str, UUID]): The ID of the user to update. + email (str, optional): The updated email for the user. + is_superuser (bool, optional): The updated superuser status for the user. + name (str, optional): The updated name for the user. + bio (str, optional): The updated bio for the user. + profile_picture (str, optional): The updated profile picture URL for the user. + + Returns: + User: The response from the server. + """ + data = { + "user_id": user_id, + "email": email, + "is_superuser": is_superuser, + "name": name, + "bio": bio, + "profile_picture": profile_picture, + } + data = {k: v for k, v in data.items() if v is not None} + return self._make_request("PUT", "user", json=data) # type: ignore + + @deprecated("Use client.users.refresh_token() instead") + def refresh_access_token(self) -> dict[str, Token]: + """ + Refreshes the access token for the currently authenticated user. + + Returns: + dict[str, Token]: The access and refresh tokens from the server. + """ + response = self._make_request( # type: ignore + "POST", "refresh_access_token", json=self._refresh_token + ) + self.access_token = response["results"]["access_token"]["token"] + self._refresh_token = response["results"]["refresh_token"]["token"] + return response + + @deprecated("Use client.users.change_password() instead") + def change_password( + self, current_password: str, new_password: str + ) -> dict: + """ + Changes the password of the currently authenticated user. + + Args: + current_password (str): The current password of the user. + new_password (str): The new password to set for the user. + + Returns: + dict: The response from the server. + """ + data = { + "current_password": current_password, + "new_password": new_password, + } + return self._make_request("POST", "change_password", json=data) # type: ignore + + @deprecated("Use client.users.request_password_reset() instead") + def request_password_reset(self, email: str) -> dict: + """ + Requests a password reset for the user with the given email. + + Args: + email (str): The email of the user to request a password reset for. + + Returns: + dict: The response from the server. + """ + return self._make_request( # type: ignore + "POST", "request_password_reset", json=email + ) + + @deprecated("Use client.users.reset_password() instead") + def confirm_password_reset( + self, reset_token: str, new_password: str + ) -> dict: + """ + Confirms a password reset for the user with the given reset token. + + Args: + reset_token (str): The reset token to confirm the password reset with. + new_password (str): The new password to set for the user. + + Returns: + dict: The response from the server. + """ + data = {"reset_token": reset_token, "new_password": new_password} + return self._make_request("POST", "reset_password", json=data) # type: ignore + + @deprecated("Use client.users.login_with_token() instead") + def login_with_token( + self, + access_token: str, + ) -> dict[str, Token]: + """ + Logs in a user using existing access and refresh tokens. + + Args: + access_token (str): The existing access token. + refresh_token (str): The existing refresh token. + + Returns: + dict[str, Token]: The access and refresh tokens from the server. + """ + self.access_token = access_token + # Verify the tokens by making a request to the user endpoint + try: + self._make_request("GET", "user") # type: ignore + return { + "access_token": Token( + token=access_token, token_type="access_token" + ), + } + except Exception: + # If the request fails, clear the tokens and raise an exception + self.access_token = None + self._refresh_token = None + raise ValueError("Invalid tokens provided") + + @deprecated("") + def get_user_verification_code(self, user_id: Union[str, UUID]) -> dict: + """ + Retrieves only the verification code for a specific user. Requires superuser access. + + Args: + user_id (Union[str, UUID]): The ID of the user to get verification code for. + + Returns: + dict: Contains verification code and its expiry date + """ + return self._make_request( # type: ignore + "GET", f"user/{user_id}/verification_data" + ) + + @deprecated("") + def send_reset_email(self, email: str) -> dict: + """ + Generates a new verification code and sends a reset email to the user. + + Args: + email (str): The email address of the user to send the reset email to. + + Returns: + dict: Contains verification code and message from the server. + """ + return self._make_request( # type: ignore + "POST", "send_reset_email", json=email + ) diff --git a/py/sdk/v2/sync_ingestion.py b/py/sdk/v2/sync_ingestion.py new file mode 100644 index 000000000..c0d5c090a --- /dev/null +++ b/py/sdk/v2/sync_ingestion.py @@ -0,0 +1,358 @@ +from __future__ import annotations # for Python 3.10+ + +import json +import os +from contextlib import ExitStack +from typing import Optional, Union +from uuid import UUID + +from typing_extensions import deprecated + +from shared.abstractions import IndexMeasure, IndexMethod, VectorTableName + + +class SyncIngestionMixins: + @deprecated("Use client.documents.create() instead") + def ingest_files( + self, + file_paths: list[str], + document_ids: Optional[list[Union[str, UUID]]] = None, + metadatas: Optional[list[dict]] = None, + ingestion_config: Optional[dict] = None, + collection_ids: Optional[list[list[Union[str, UUID]]]] = None, + run_with_orchestration: Optional[bool] = None, + ) -> dict: + """ + Ingest files into your R2R deployment + + Args: + file_paths (List[str]): List of file paths to ingest. + document_ids (Optional[List[str]]): List of document IDs. + metadatas (Optional[List[dict]]): List of metadata dictionaries for each file. + ingestion_config (Optional[Union[dict]]): Custom chunking configuration. + + Returns: + dict: Ingestion results containing processed, failed, and skipped documents. + """ + if document_ids is not None and len(file_paths) != len(document_ids): + raise ValueError( + "Number of file paths must match number of document IDs." + ) + if metadatas is not None and len(file_paths) != len(metadatas): + raise ValueError( + "Number of metadatas must match number of document IDs." + ) + + with ExitStack() as stack: + all_file_paths: list[str] = [] + for path in file_paths: + if os.path.isdir(path): + for root, _, files in os.walk(path): + all_file_paths.extend( + os.path.join(root, file) for file in files + ) + else: + all_file_paths.append(path) + + with ExitStack() as stack: + files_tuples = [ + ( + "files", + ( + os.path.basename(file), + stack.enter_context(open(file, "rb")), + "application/octet-stream", + ), + ) + for file in all_file_paths + ] + + data = {} + if document_ids: + data["document_ids"] = json.dumps( + [str(doc_id) for doc_id in document_ids] + ) + if metadatas: + data["metadatas"] = json.dumps(metadatas) + + if ingestion_config: + data["ingestion_config"] = json.dumps(ingestion_config) + + if run_with_orchestration is not None: + data["run_with_orchestration"] = str( + run_with_orchestration + ) + + if collection_ids: + data["collection_ids"] = json.dumps( + [ + [ + str(collection_id) + for collection_id in doc_collection_ids + ] + for doc_collection_ids in collection_ids + ] + ) + + return self._make_request( # type: ignore + "POST", "ingest_files", data=data, files=files_tuples + ) + + @deprecated("Use client.documents.update() instead") + def update_files( + self, + file_paths: list[str], + document_ids: Optional[list[Union[str, UUID]]] = None, + metadatas: Optional[list[dict]] = None, + ingestion_config: Optional[dict] = None, + collection_ids: Optional[list[list[Union[str, UUID]]]] = None, + run_with_orchestration: Optional[bool] = None, + ) -> dict: + """ + Update existing files in your R2R deployment. + + Args: + file_paths (List[str]): List of file paths to update. + document_ids (List[str]): List of document IDs to update. + metadatas (Optional[List[dict]]): List of updated metadata dictionaries for each file. + ingestion_config (Optional[Union[dict]]): Custom chunking configuration. + + Returns: + dict: Update results containing processed, failed, and skipped documents. + """ + if document_ids is not None and len(file_paths) != len(document_ids): + raise ValueError( + "Number of file paths must match number of document IDs." + ) + if metadatas is not None and len(file_paths) != len(metadatas): + raise ValueError( + "Number of file paths must match number of document IDs." + ) + + with ExitStack() as stack: + files = [ + ( + "files", + ( + os.path.basename(file), + stack.enter_context(open(file, "rb")), + "application/octet-stream", + ), + ) + for file in file_paths + ] + + data = {} + if document_ids: + data["document_ids"] = json.dumps( + [str(doc_id) for doc_id in document_ids] + ) + if metadatas: + data["metadatas"] = json.dumps(metadatas) + if ingestion_config: + data["ingestion_config"] = json.dumps(ingestion_config) + + if run_with_orchestration is not None: + data["run_with_orchestration"] = str(run_with_orchestration) + + if collection_ids: + data["collection_ids"] = json.dumps( + [ + [ + str(collection_id) + for collection_id in doc_collection_ids + ] + for doc_collection_ids in collection_ids + ] + ) + return self._make_request( # type: ignore + "POST", "update_files", data=data, files=files + ) + + @deprecated("Use client.chunks.create() instead") + def ingest_chunks( + self, + chunks: list[dict], + document_id: Optional[UUID] = None, + metadata: Optional[dict] = None, + collection_ids: Optional[list[list[Union[str, UUID]]]] = None, + run_with_orchestration: Optional[bool] = None, + ) -> dict: + """ + Ingest files into your R2R deployment + + Args: + chunks (List[dict]): List of dictionaries containing chunk data. + document_id (Optional[UUID]): The ID of the document to ingest chunks into. + metadata (Optional[dict]): Metadata dictionary for the document + + Returns: + dict: Ingestion results containing processed, failed, and skipped documents. + """ + + data = { + "chunks": chunks, + "document_id": document_id, + "metadata": metadata, + } + if run_with_orchestration is not None: + data["run_with_orchestration"] = str(run_with_orchestration) # type: ignore + + if collection_ids: + data["collection_ids"] = json.dumps( # type: ignore + [ + [ + str(collection_id) + for collection_id in doc_collection_ids + ] + for doc_collection_ids in collection_ids + ] + ) + + return self._make_request("POST", "ingest_chunks", json=data) # type: ignore + + @deprecated("Use client.chunks.update() instead") + def update_chunks( + self, + document_id: UUID, + chunk_id: UUID, + text: str, + metadata: Optional[dict] = None, + run_with_orchestration: Optional[bool] = None, + ) -> dict: + """ + Update the content of an existing chunk. + + Args: + document_id (UUID): The ID of the document containing the chunk. + chunk_id (UUID): The ID of the chunk to update. + text (str): The new text content of the chunk. + metadata (Optional[dict]): Metadata dictionary for the chunk. + run_with_orchestration (Optional[bool]): Whether to run the update through orchestration. + + Returns: + dict: Update results containing processed, failed, and skipped documents. + """ + + data = { + "text": text, + "metadata": metadata, + "run_with_orchestration": run_with_orchestration, + } + + # Remove None values from payload + data = {k: v for k, v in data.items() if v is not None} + + return self._make_request("PUT", f"update_chunk/{document_id}/{chunk_id}", json=data) # type: ignore + + @deprecated("Use client.indices.create() instead") + def create_vector_index( + self, + table_name: VectorTableName = VectorTableName.CHUNKS, + index_method: IndexMethod = IndexMethod.hnsw, + index_measure: IndexMeasure = IndexMeasure.cosine_distance, + index_arguments: Optional[dict] = None, + index_name: Optional[str] = None, + index_column: Optional[list[str]] = None, + concurrently: bool = True, + ) -> dict: + """ + Create a vector index for a given table. + + Args: + table_name (VectorTableName): Name of the table to create index on + index_method (IndexMethod): Method to use for indexing (hnsw or ivf_flat) + index_measure (IndexMeasure): Distance measure to use + index_arguments (Optional[dict]): Additional arguments for the index + index_name (Optional[str]): Custom name for the index + concurrently (bool): Whether to create the index concurrently + + Returns: + dict: Response containing the creation status + """ + data = { + "table_name": table_name, + "index_method": index_method, + "index_measure": index_measure, + "index_arguments": index_arguments, + "index_name": index_name, + "index_column": index_column, + "concurrently": concurrently, + } + return self._make_request( # type: ignore + "POST", "create_vector_index", json=data + ) + + @deprecated("Use client.indices.list() instead") + def list_vector_indices( + self, + table_name: VectorTableName = VectorTableName.CHUNKS, + ) -> dict: + """ + List all vector indices for a given table. + + Args: + table_name (VectorTableName): Name of the table to list indices from + + Returns: + dict: Response containing the list of indices + """ + params = {"table_name": table_name} + return self._make_request( # type: ignore + "GET", "list_vector_indices", params=params + ) + + @deprecated("Use client.indices.delete() instead") + def delete_vector_index( + self, + index_name: str, + table_name: VectorTableName = VectorTableName.CHUNKS, + concurrently: bool = True, + ) -> dict: + """ + Delete a vector index from a given table. + + Args: + index_name (str): Name of the index to delete + table_name (VectorTableName): Name of the table containing the index + concurrently (bool): Whether to delete the index concurrently + + Returns: + dict: Response containing the deletion status + """ + data = { + "index_name": index_name, + "table_name": table_name, + "concurrently": concurrently, + } + return self._make_request( # type: ignore + "DELETE", "delete_vector_index", json=data + ) + + @deprecated("Use client.documents.update() instead") + def update_document_metadata( + self, + document_id: Union[str, UUID], + metadata: dict, + ) -> dict: + """ + Update the metadata of an existing document. + + Args: + document_id (Union[str, UUID]): The ID of the document to update. + metadata (dict): The new metadata to merge with existing metadata. + run_with_orchestration (Optional[bool]): Whether to run the update through orchestration. + + Returns: + dict: Update results containing the status of the metadata update. + """ + data = { + "metadata": metadata, + } + + # Remove None values from payload + data = {k: v for k, v in data.items() if v is not None} + + return self._make_request( # type: ignore + "POST", f"update_document_metadata/{document_id}", json=metadata + ) diff --git a/py/sdk/v2/sync_kg.py b/py/sdk/v2/sync_kg.py new file mode 100644 index 000000000..b8c0527ff --- /dev/null +++ b/py/sdk/v2/sync_kg.py @@ -0,0 +1,257 @@ +from typing import Optional, Union +from uuid import UUID + +from ..models import ( + KGCreationSettings, + KGEnrichmentSettings, + KGEntityDeduplicationSettings, + KGRunType, +) + + +class SyncKGMixins: + def create_graph( + self, + collection_id: Optional[Union[UUID, str]] = None, + run_type: Optional[Union[str, KGRunType]] = None, + kg_creation_settings: Optional[Union[dict, KGCreationSettings]] = None, + run_with_orchestration: Optional[bool] = None, + ) -> dict: + """ + Create a graph from the given settings. + + Args: + collection_id (Optional[Union[UUID, str]]): The ID of the collection to create the graph for. + run_type (Optional[Union[str, KGRunType]]): The type of run to perform. + kg_creation_settings (Optional[Union[dict, KGCreationSettings]]): Settings for the graph creation process. + """ + if isinstance(kg_creation_settings, KGCreationSettings): + kg_creation_settings = kg_creation_settings.model_dump() + + data = { + "collection_id": str(collection_id) if collection_id else None, + "run_type": str(run_type) if run_type else None, + "kg_creation_settings": kg_creation_settings or {}, + "run_with_orchestration": run_with_orchestration or True, + } + + return self._make_request("POST", "create_graph", json=data) # type: ignore + + def enrich_graph( + self, + collection_id: Optional[Union[UUID, str]] = None, + run_type: Optional[Union[str, KGRunType]] = None, + kg_enrichment_settings: Optional[ + Union[dict, KGEnrichmentSettings] + ] = None, + run_with_orchestration: Optional[bool] = None, + ) -> dict: + """ + Perform graph enrichment over the entire graph. + + Args: + collection_id (Optional[Union[UUID, str]]): The ID of the collection to enrich the graph for. + run_type (Optional[Union[str, KGRunType]]): The type of run to perform. + kg_enrichment_settings (Optional[Union[dict, KGEnrichmentSettings]]): Settings for the graph enrichment process. + Returns: + Results of the graph enrichment process. + """ + if isinstance(kg_enrichment_settings, KGEnrichmentSettings): + kg_enrichment_settings = kg_enrichment_settings.model_dump() + + data = { + "collection_id": str(collection_id) if collection_id else None, + "run_type": str(run_type) if run_type else None, + "kg_enrichment_settings": kg_enrichment_settings or {}, + "run_with_orchestration": run_with_orchestration or True, + } + + return self._make_request("POST", "enrich_graph", json=data) # type: ignore + + def get_entities( + self, + collection_id: Optional[Union[UUID, str]] = None, + entity_level: Optional[str] = None, + entity_ids: Optional[list[str]] = None, + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + Retrieve entities from the knowledge graph. + + Args: + collection_id (str): The ID of the collection to retrieve entities from. + offset (int): The offset for pagination. + limit (int): The limit for pagination. + entity_level (Optional[str]): The level of entity to filter by. + entity_ids (Optional[List[str]]): Optional list of entity IDs to filter by. + + Returns: + dict: A dictionary containing the retrieved entities and total count. + """ + + params = { + "collection_id": collection_id, + "entity_level": entity_level, + "entity_ids": entity_ids, + "offset": offset, + "limit": limit, + } + + params = {k: v for k, v in params.items() if v is not None} + + return self._make_request("GET", "entities", params=params) # type: ignore + + def get_triples( + self, + collection_id: Optional[Union[UUID, str]] = None, + entity_names: Optional[list[str]] = None, + triple_ids: Optional[list[str]] = None, + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + Retrieve triples from the knowledge graph. + + Args: + collection_id (str): The ID of the collection to retrieve triples from. + offset (int): The offset for pagination. + limit (int): The limit for pagination. + entity_names (Optional[List[str]]): Optional list of entity names to filter by. + triple_ids (Optional[List[str]]): Optional list of triple IDs to filter by. + + Returns: + dict: A dictionary containing the retrieved triples and total count. + """ + + params = { + "collection_id": collection_id, + "entity_names": entity_names, + "triple_ids": triple_ids, + "offset": offset, + "limit": limit, + } + + params = {k: v for k, v in params.items() if v is not None} + + return self._make_request("GET", "triples", params=params) # type: ignore + + def get_communities( + self, + collection_id: Optional[Union[UUID, str]] = None, + levels: Optional[list[int]] = None, + community_ids: Optional[list[UUID]] = None, + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + Retrieve communities from the knowledge graph. + + Args: + collection_id (str): The ID of the collection to retrieve communities from. + offset (int): The offset for pagination. + limit (int): The limit for pagination. + levels (Optional[List[int]]): Optional list of levels to filter by. + community_ids (Optional[List[int]]): Optional list of community numbers to filter by. + + Returns: + dict: A dictionary containing the retrieved communities. + """ + + params = { + "collection_id": collection_id, + "levels": levels, + "community_ids": community_ids, + "offset": offset, + "limit": limit, + } + + params = {k: v for k, v in params.items() if v is not None} + + return self._make_request("GET", "communities", params=params) # type: ignore + + def get_tuned_prompt( + self, + prompt_name: str, + collection_id: Optional[str] = None, + documents_offset: Optional[int] = 0, + documents_limit: Optional[int] = 100, + chunk_offset: Optional[int] = 0, + chunk_limit: Optional[int] = 100, + ) -> dict: + """ + Tune the GraphRAG prompt for a given collection. + + The tuning process provides an LLM with chunks from each document in the collection. The relative sample size can therefore be controlled by adjusting the document and chunk limits. + + Args: + prompt_name (str): The name of the prompt to tune. + collection_id (str): The ID of the collection to tune the prompt for. + documents_offset (Optional[int]): The offset for pagination of documents. + documents_limit (Optional[int]): The limit for pagination of documents. + chunk_offset (Optional[int]): The offset for pagination of chunks. + chunk_limit (Optional[int]): The limit for pagination of chunks. + + Returns: + dict: A dictionary containing the tuned prompt. + """ + params = { + "prompt_name": prompt_name, + "collection_id": collection_id, + "documents_offset": documents_offset, + "documents_limit": documents_limit, + "chunk_offset": chunk_offset, + "chunk_limit": chunk_limit, + } + + params = {k: v for k, v in params.items() if v is not None} + + return self._make_request("GET", "tuned_prompt", params=params) # type: ignore + + def deduplicate_entities( + self, + collection_id: Optional[Union[UUID, str]] = None, + run_type: Optional[Union[str, KGRunType]] = None, + deduplication_settings: Optional[ + Union[dict, KGEntityDeduplicationSettings] + ] = None, + ): + """ + Deduplicate entities in the knowledge graph. + Args: + collection_id (Optional[Union[UUID, str]]): The ID of the collection to deduplicate entities for. + run_type (Optional[Union[str, KGRunType]]): The type of run to perform. + deduplication_settings (Optional[Union[dict, KGEntityDeduplicationSettings]]): Settings for the deduplication process. + """ + if isinstance(deduplication_settings, KGEntityDeduplicationSettings): + deduplication_settings = deduplication_settings.model_dump() + + data = { + "collection_id": str(collection_id) if collection_id else None, + "run_type": str(run_type) if run_type else None, + "deduplication_settings": deduplication_settings or {}, + } + + return self._make_request( # type: ignore + "POST", "deduplicate_entities", json=data + ) + + def delete_graph_for_collection( + self, collection_id: Union[UUID, str], cascade: bool = False + ) -> dict: + """ + Delete the graph for a given collection. + + Args: + collection_id (Union[UUID, str]): The ID of the collection to delete the graph for. + cascade (bool): Whether to cascade the deletion, and delete entities and triples belonging to the collection. + + NOTE: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection. + """ + + data = { + "collection_id": str(collection_id), + "cascade": cascade, + } + + return self._make_request("DELETE", "delete_graph_for_collection", json=data) # type: ignore diff --git a/py/sdk/v2/sync_management.py b/py/sdk/v2/sync_management.py new file mode 100644 index 000000000..0374e33bd --- /dev/null +++ b/py/sdk/v2/sync_management.py @@ -0,0 +1,833 @@ +from __future__ import annotations # for Python 3.10+ + +import json +from typing import Any, Optional, Union +from uuid import UUID + +from typing_extensions import deprecated + +from ..models import Message + + +class SyncManagementMixins: + @deprecated("Use client.prompts.update() instead") + def update_prompt( + self, + name: str, + template: Optional[str] = None, + input_types: Optional[dict[str, str]] = None, + ) -> dict: + """ + Update a prompt in the database. + + Args: + name (str): The name of the prompt to update. + template (Optional[str]): The new template for the prompt. + input_types (Optional[dict[str, str]]): The new input types for the prompt. + + Returns: + dict: The response from the server. + """ + data: dict = {"name": name} + if template is not None: + data["template"] = template + if input_types is not None: + data["input_types"] = input_types + + return self._make_request("POST", "update_prompt", json=data) # type: ignore + + @deprecated("Use client.prompts.create() instead") + def add_prompt( + self, + name: str, + template: str, + input_types: dict[str, str], + ) -> dict: + """ + Add a new prompt to the system. + + Args: + name (str): The name of the prompt. + template (str): The template for the prompt. + input_types (dict[str, str]): The input types for the prompt. + + Returns: + dict: The response from the server. + """ + data = { + "name": name, + "template": template, + "input_types": input_types, + } + return self._make_request("POST", "add_prompt", json=data) # type: ignore + + @deprecated("Use client.prompts.retrieve() instead") + def get_prompt( + self, + prompt_name: str, + inputs: Optional[dict[str, Any]] = None, + prompt_override: Optional[str] = None, + ) -> dict: + """ + Get a prompt from the system. + + Args: + prompt_name (str): The name of the prompt to retrieve. + inputs (Optional[dict[str, Any]]): Optional inputs for the prompt. + prompt_override (Optional[str]): Optional override for the prompt template. + + Returns: + dict: The response from the server. + """ + params = {} + if inputs: + params["inputs"] = json.dumps(inputs) + if prompt_override: + params["prompt_override"] = prompt_override + return self._make_request( # type: ignore + "GET", f"get_prompt/{prompt_name}", params=params + ) + + @deprecated("Use client.prompts.list() instead") + def get_all_prompts(self) -> dict: + """ + Get all prompts from the system. + + Returns: + dict: The response from the server containing all prompts. + """ + return self._make_request("GET", "get_all_prompts") # type: ignore + + @deprecated("Use client.prompts.delete() instead") + def delete_prompt(self, prompt_name: str) -> dict: + """ + Delete a prompt from the system. + + Args: + prompt_name (str): The name of the prompt to delete. + + Returns: + dict: The response from the server. + """ + return self._make_request( # type: ignore + "DELETE", f"delete_prompt/{prompt_name}" + ) + + @deprecated( + "This method is deprecated. New, improved analytics features will be added in a future release." + ) + def analytics( + self, + filter_criteria: Optional[Union[dict, str]] = None, + analysis_types: Optional[Union[dict, str]] = None, + ) -> dict: + """ + Get analytics data from the server. + + Args: + filter_criteria (Optional[Union[dict, str]]): The filter criteria to use. + analysis_types (Optional[Union[dict, str]]): The types of analysis to perform. + + Returns: + dict: The analytics data from the server. + """ + params = {} + if filter_criteria: + if isinstance(filter_criteria, dict): + params["filter_criteria"] = json.dumps(filter_criteria) + else: + params["filter_criteria"] = filter_criteria + if analysis_types: + if isinstance(analysis_types, dict): + params["analysis_types"] = json.dumps(analysis_types) + else: + params["analysis_types"] = analysis_types + + return self._make_request("GET", "analytics", params=params) # type: ignore + + @deprecated("Use client.system.settings() instead") + def app_settings(self) -> dict: + """ + Get the configuration settings for the app. + + Returns: + dict: The app settings. + """ + return self._make_request("GET", "app_settings") # type: ignore + + @deprecated("Use client.users.list() instead") + def users_overview( + self, + user_ids: Optional[list[str]] = None, + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + An overview of users in the R2R deployment. + + Args: + user_ids (Optional[list[str]]): List of user IDs to get an overview for. + + Returns: + dict: The overview of users in the system. + """ + params: dict = {} + if user_ids is not None: + params["user_ids"] = [str(uid) for uid in user_ids] + if offset is not None: + params["offset"] = offset + if limit is not None: + params["limit"] = limit + return self._make_request( # type: ignore + "GET", "users_overview", params=params + ) + + @deprecated("Use client..delete() instead") + def delete( + self, + filters: dict, + ) -> dict: + """ + Delete data from the database given a set of filters. + + Args: + filters (dict[str, str]): The filters to delete by. + + Returns: + dict: The results of the deletion. + """ + filters_json = json.dumps(filters) + + return self._make_request( # type: ignore + "DELETE", "delete", params={"filters": filters_json} + ) or {"results": {}} + + @deprecated("Use client.documents.download() instead") + def download_file( + self, + document_id: Union[str, UUID], + ): + """ + Download a file from the R2R deployment. + + Args: + document_id (str): The ID of the document to download. + + Returns: + dict: The response from the server. + """ + return self._make_request( # type: ignore + "GET", f"download_file/{str(document_id)}" + ) + + @deprecated("Use client.documents.list() instead") + def documents_overview( + self, + document_ids: Optional[list[Union[UUID, str]]] = None, + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + Get an overview of documents in the R2R deployment. + + Args: + document_ids (Optional[list[str]]): List of document IDs to get an overview for. + + Returns: + dict: The overview of documents in the system. + """ + params: dict = {} + document_ids = ( + [str(doc_id) for doc_id in document_ids] if document_ids else None + ) + if document_ids: + params["document_ids"] = document_ids + if offset is not None: + params["offset"] = offset + if limit is not None: + params["limit"] = limit + return self._make_request( # type: ignore + "GET", "documents_overview", params=params + ) + + @deprecated("Use client.documents.list_chunks() instead") + def list_document_chunks( + self, + document_id: str, + offset: Optional[int] = None, + limit: Optional[int] = None, + include_vectors: Optional[bool] = False, + ) -> dict: + """ + Get the chunks for a document. + + Args: + document_id (str): The ID of the document to get chunks for. + + Returns: + dict: The chunks for the document. + """ + params: dict = {} + if offset is not None: + params["offset"] = offset + if limit is not None: + params["limit"] = limit + if include_vectors: + params["include_vectors"] = include_vectors + if not params: + return self._make_request( # type: ignore + "GET", f"list_document_chunks/{document_id}" + ) + else: + return self._make_request( # type: ignore + "GET", f"list_document_chunks/{document_id}", params=params + ) + + @deprecated("Use client.collections.list() instead") + def collections_overview( + self, + collection_ids: Optional[list[str]] = None, + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + Get an overview of existing collections. + + Args: + collection_ids (Optional[list[str]]): List of collection IDs to get an overview for. + limit (Optional[int]): The maximum number of collections to return. + offset (Optional[int]): The offset to start listing collections from. + + Returns: + dict: The overview of collections in the system. + """ + params: dict = {} + if collection_ids: + params["collection_ids"] = collection_ids + if offset: + params["offset"] = offset + if limit: + params["limit"] = limit + return self._make_request( # type: ignore + "GET", "collections_overview", params=params + ) + + @deprecated("Use client.collections.create() instead") + def create_collection( + self, + name: str, + description: Optional[str] = None, + ) -> dict: + """ + Create a new collection. + + Args: + name (str): The name of the collection. + description (Optional[str]): The description of the collection. + + Returns: + dict: The response from the server. + """ + data = {"name": name} + if description is not None: + data["description"] = description + + return self._make_request( # type: ignore + "POST", "create_collection", json=data + ) + + @deprecated("Use client.collections.retrieve() instead") + def get_collection( + self, + collection_id: Union[str, UUID], + ) -> dict: + """ + Get a collection by its ID. + + Args: + collection_id (str): The ID of the collection to get. + + Returns: + dict: The collection data. + """ + return self._make_request( # type: ignore + "GET", f"get_collection/{str(collection_id)}" + ) + + @deprecated("Use client.collections.update() instead") + def update_collection( + self, + collection_id: Union[str, UUID], + name: Optional[str] = None, + description: Optional[str] = None, + ) -> dict: + """ + Updates the name and description of a collection. + + Args: + collection_id (str): The ID of the collection to update. + name (Optional[str]): The new name for the collection. + description (Optional[str]): The new description of the collection. + + Returns: + dict: The response from the server. + """ + data = {"collection_id": str(collection_id)} + if name is not None: + data["name"] = name + if description is not None: + data["description"] = description + + return self._make_request( # type: ignore + "PUT", "update_collection", json=data + ) + + @deprecated("Use client.collections.delete() instead") + def delete_collection( + self, + collection_id: Union[str, UUID], + ) -> dict: + """ + Delete a collection by its ID. + + Args: + collection_id (str): The ID of the collection to delete. + + Returns: + dict: The response from the server. + """ + return self._make_request( # type: ignore + "DELETE", f"delete_collection/{str(collection_id)}" + ) + + @deprecated("Use client.users.delete() instead") + def delete_user( + self, + user_id: str, + password: Optional[str] = None, + delete_vector_data: bool = False, + ) -> dict: + """ + Delete a collection by its ID. + + Args: + collection_id (str): The ID of the collection to delete. + + Returns: + dict: The response from the server. + """ + params: dict = {} + if password is not None: + params["password"] = password + if delete_vector_data: + params["delete_vector_data"] = delete_vector_data + if not params: + return self._make_request("DELETE", f"user/{user_id}") # type: ignore + else: + return self._make_request( # type: ignore + "DELETE", f"user/{user_id}", json=params + ) + + @deprecated("Use client.collections.list() instead") + def list_collections( + self, + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + List all collections in the R2R deployment. + + Args: + offset (Optional[int]): The offset to start listing collections from. + limit (Optional[int]): The maximum number of collections to return. + + Returns: + dict: The list of collections. + """ + params = {} + if offset is not None: + params["offset"] = offset + if limit is not None: + params["limit"] = limit + return self._make_request( # type: ignore + "GET", "list_collections", params=params + ) + + @deprecated("Use client.collections.add_user() instead") + def add_user_to_collection( + self, + user_id: Union[str, UUID], + collection_id: Union[str, UUID], + ) -> dict: + """ + Add a user to a collection. + + Args: + user_id (str): The ID of the user to add. + collection_id (str): The ID of the collection to add the user to. + + Returns: + dict: The response from the server. + """ + data = { + "user_id": str(user_id), + "collection_id": str(collection_id), + } + return self._make_request( # type: ignore + "POST", "add_user_to_collection", json=data + ) + + @deprecated("Use client.collections.remove_user() instead") + def remove_user_from_collection( + self, + user_id: Union[str, UUID], + collection_id: Union[str, UUID], + ) -> dict: + """ + Remove a user from a collection. + + Args: + user_id (str): The ID of the user to remove. + collection_id (str): The ID of the collection to remove the user from. + + Returns: + dict: The response from the server. + """ + data = { + "user_id": str(user_id), + "collection_id": str(collection_id), + } + return self._make_request( # type: ignore + "POST", "remove_user_from_collection", json=data + ) + + @deprecated("Use client.collections.list_users() instead") + def get_users_in_collection( + self, + collection_id: Union[str, UUID], + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + Get all users in a collection. + + Args: + collection_id (str): The ID of the collection to get users for. + offset (Optional[int]): The offset to start listing users from. + limit (Optional[int]): The maximum number of users to return. + + Returns: + dict: The list of users in the collection. + """ + params = {} + if offset is not None: + params["offset"] = offset + if limit is not None: + params["limit"] = limit + return self._make_request( # type: ignore + "GET", + f"get_users_in_collection/{str(collection_id)}", + params=params, + ) + + @deprecated("Use client.users.list_collections() instead") + def user_collections( + self, + user_id: Union[str, UUID], + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + Get all collections that a user is a member of. + + Args: + user_id (str): The ID of the user to get collections for. + + Returns: + dict: The list of collections that the user is a member of. + """ + params = {} + if offset is not None: + params["offset"] = offset + if limit is not None: + params["limit"] = limit + if not params: + return self._make_request( # type: ignore + "GET", f"user_collections/{str(user_id)}" + ) + else: + return self._make_request( # type: ignore + "GET", f"user_collections/{str(user_id)}", params=params + ) + + @deprecated("Use client.collections.add_document() instead") + def assign_document_to_collection( + self, + document_id: Union[str, UUID], + collection_id: Union[str, UUID], + ) -> dict: + """ + Assign a document to a collection. + + Args: + document_id (str): The ID of the document to assign. + collection_id (str): The ID of the collection to assign the document to. + + Returns: + dict: The response from the server. + """ + data = { + "document_id": str(document_id), + "collection_id": str(collection_id), + } + return self._make_request( # type: ignore + "POST", "assign_document_to_collection", json=data + ) + + # TODO: Verify that this method is implemented, also, should be a PUT request + @deprecated("Use client.collections.remove_document() instead") + def remove_document_from_collection( + self, + document_id: Union[str, UUID], + collection_id: Union[str, UUID], + ) -> dict: + """ + Remove a document from a collection. + + Args: + document_id (str): The ID of the document to remove. + collection_id (str): The ID of the collection to remove the document from. + + Returns: + dict: The response from the server. + """ + data = { + "document_id": str(document_id), + "collection_id": str(collection_id), + } + return self._make_request( # type: ignore + "POST", "remove_document_from_collection", json=data + ) + + @deprecated("Use client.documents.list_collections() instead") + def document_collections( + self, + document_id: Union[str, UUID], + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + Get all collections that a document is assigned to. + + Args: + document_id (str): The ID of the document to get collections for. + + Returns: + dict: The list of collections that the document is assigned to. + """ + params = {} + if offset is not None: + params["offset"] = offset + if limit is not None: + params["limit"] = limit + if not params: + return self._make_request( # type: ignore + "GET", + f"document_collections/{str(document_id)}", + params=params, + ) + else: + return self._make_request( # type: ignore + "GET", f"document_collections/{str(document_id)}" + ) + + @deprecated("Use client.collections.list_documents() instead") + def documents_in_collection( + self, + collection_id: Union[str, UUID], + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + Get all documents in a collection. + + Args: + collection_id (str): The ID of the collection to get documents for. + offset (Optional[int]): The offset to start listing documents from. + limit (Optional[int]): The maximum number of documents to return. + + Returns: + dict: The list of documents in the collection. + """ + params = {} + if offset is not None: + params["offset"] = offset + if limit is not None: + params["limit"] = limit + return self._make_request( # type: ignore + "GET", f"collection/{str(collection_id)}/documents", params=params + ) + + @deprecated("Use client.conversations.list() instead") + def conversations_overview( + self, + conversation_ids: Optional[list[Union[UUID, str]]] = None, + offset: Optional[int] = None, + limit: Optional[int] = None, + ) -> dict: + """ + Get an overview of existing conversations. + + Args: + conversation_ids (Optional[list[Union[UUID, str]]]): list of conversation IDs to retrieve. + offset (Optional[int]): The offset to start listing conversations from. + limit (Optional[int]): The maximum number of conversations to return. + + Returns: + dict[str, any]: The overview of conversations in the system. + """ + params: dict = {} + if conversation_ids: + params["conversation_ids"] = [str(cid) for cid in conversation_ids] + if offset is not None: + params["offset"] = offset + if limit is not None: + params["limit"] = limit + return self._make_request( # type: ignore + "GET", "conversations_overview", params=params + ) + + @deprecated("Use client.conversations.retrieve() instead") + def get_conversation( + self, + conversation_id: Union[str, UUID], + branch_id: Optional[str] = None, + ) -> dict: + """ + Get a conversation by its ID. + + Args: + conversation_id (Union[str, UUID]): The ID of the conversation to retrieve. + branch_id (Optional[str]): The ID of a specific branch to retrieve. + + Returns: + dict: The conversation data. + """ + query_params = f"?branch_id={branch_id}" if branch_id else "" + return self._make_request( # type: ignore + "GET", f"get_conversation/{str(conversation_id)}{query_params}" + ) + + @deprecated("Use client.conversations.create() instead") + def create_conversation(self) -> dict: + """ + Create a new conversation. + + Returns: + dict: The response from the server. + """ + return self._make_request("POST", "create_conversation") # type: ignore + + @deprecated("Use client.conversations.add_message() instead") + def add_message( + self, + conversation_id: Union[str, UUID], + message: Message, + parent_id: Optional[str] = None, + metadata: Optional[dict[str, Any]] = None, + ) -> dict: + """ + Add a message to an existing conversation. + + Args: + conversation_id (Union[str, UUID]): The ID of the conversation. + message (Message): The message to add. + parent_id (Optional[str]): The ID of the parent message. + metadata (Optional[dict[str, Any]]): Additional metadata for the message. + + Returns: + dict: The response from the server. + """ + data: dict = {"message": message} + if parent_id is not None: + data["parent_id"] = parent_id + if metadata is not None: + data["metadata"] = metadata + return self._make_request( # type: ignore + "POST", f"add_message/{str(conversation_id)}", data=data + ) + + @deprecated("Use client.conversations.update_message() instead") + def update_message( + self, + message_id: str, + message: Message, + ) -> dict: + """ + Update a message in an existing conversation. + + Args: + message_id (str): The ID of the message to update. + message (Message): The updated message. + + Returns: + dict: The response from the server. + """ + return self._make_request( # type: ignore + "PUT", f"update_message/{message_id}", data=message + ) + + def update_message_metadata( + self, + message_id: str, + metadata: dict[str, Any], + ) -> dict: + """ + Update the metadata of a message. + + Args: + message_id (str): The ID of the message to update. + metadata (dict[str, Any]): The metadata to update. + + Returns: + dict: The response from the server. + """ + return self._make_request( # type: ignore + "PATCH", f"messages/{message_id}/metadata", data=metadata + ) + + @deprecated("Use client.conversations.list_branches() instead") + def branches_overview( + self, + conversation_id: Union[str, UUID], + ) -> dict: + """ + Get an overview of branches in a conversation. + + Args: + conversation_id (Union[str, UUID]): The ID of the conversation to get branches for. + + Returns: + dict: The response from the server. + """ + return self._make_request( # type: ignore + "GET", f"branches_overview/{str(conversation_id)}" + ) + + @deprecated("Use client.conversations.delete() instead") + def delete_conversation( + self, + conversation_id: Union[str, UUID], + ) -> dict: + """ + Delete a conversation by its ID. + + Args: + conversation_id (Union[str, UUID]): The ID of the conversation to delete. + + Returns: + dict: The response from the server. + """ + return self._make_request( # type: ignore + "DELETE", f"delete_conversation/{str(conversation_id)}" + ) diff --git a/py/sdk/v2/sync_retrieval.py b/py/sdk/v2/sync_retrieval.py new file mode 100644 index 000000000..a9a6c7b3f --- /dev/null +++ b/py/sdk/v2/sync_retrieval.py @@ -0,0 +1,211 @@ +from __future__ import annotations # for Python 3.10+ + +import logging +from typing import AsyncGenerator, Optional + +from typing_extensions import deprecated + +from ..models import GenerationConfig, Message, RAGResponse, SearchSettings + +logger = logging.getLogger() + + +class SyncRetrievalMixins: + def search_documents( + self, + query: str, + search_settings: Optional[dict | SearchSettings] = None, + ): + """ + Conduct a vector and/or KG search. + + Args: + query (str): The query to search for. + search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. + + Returns: + SearchResponse: The search response. + """ + if search_settings and not isinstance(search_settings, dict): + search_settings = search_settings.model_dump() + + data = { + "query": query, + "search_settings": search_settings, + } + return self._make_request("POST", "search_documents", json=data) # type: ignore + + @deprecated("Use client.retrieval.search() instead") + def search( + self, + query: str, + search_settings: Optional[dict | SearchSettings] = None, + ): + """ + Conduct a vector and/or KG search. + + Args: + query (str): The query to search for. + search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. + + Returns: + CombinedSearchResponse: The search response. + """ + if search_settings and not isinstance(search_settings, dict): + search_settings = search_settings.model_dump() + + data = { + "query": query, + "search_settings": search_settings, + } + return self._make_request("POST", "search", json=data) # type: ignore + + @deprecated("Use client.retrieval.completion() instead") + def completion( + self, + messages: list[dict | Message], + generation_config: Optional[dict | GenerationConfig] = None, + ): + cast_messages: list[Message] = [ + Message(**msg) if isinstance(msg, dict) else msg + for msg in messages + ] + + if generation_config and not isinstance(generation_config, dict): + generation_config = generation_config.model_dump() + + data = { + "messages": [msg.model_dump() for msg in cast_messages], + "generation_config": generation_config, + } + + return self._make_request("POST", "completion", json=data) # type: ignore + + @deprecated("Use client.retrieval.rag() instead") + def rag( + self, + query: str, + rag_generation_config: Optional[dict | GenerationConfig] = None, + search_settings: Optional[dict | SearchSettings] = None, + task_prompt_override: Optional[str] = None, + include_title_if_available: Optional[bool] = False, + ) -> RAGResponse | AsyncGenerator[RAGResponse, None]: + """ + Conducts a Retrieval Augmented Generation (RAG) search with the given query. + + Args: + query (str): The query to search for. + rag_generation_config (Optional[Union[dict, GenerationConfig]]): RAG generation configuration. + search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. + task_prompt_override (Optional[str]): Task prompt override. + include_title_if_available (Optional[bool]): Include the title if available. + + Returns: + Union[RAGResponse, AsyncGenerator[RAGResponse, None]]: The RAG response + """ + if rag_generation_config and not isinstance( + rag_generation_config, dict + ): + rag_generation_config = rag_generation_config.model_dump() + if search_settings and not isinstance(search_settings, dict): + search_settings = search_settings.model_dump() + + data = { + "query": query, + "rag_generation_config": rag_generation_config, + "search_settings": search_settings, + "task_prompt_override": task_prompt_override, + "include_title_if_available": include_title_if_available, + } + + if rag_generation_config and rag_generation_config.get( # type: ignore + "stream", False + ): + return self._make_streaming_request("POST", "rag", json=data) # type: ignore + else: + return self._make_request("POST", "rag", json=data) # type: ignore + + @deprecated("Use client.retrieval.agent() instead") + def agent( + self, + message: Optional[dict | Message] = None, + rag_generation_config: Optional[dict | GenerationConfig] = None, + search_settings: Optional[dict | SearchSettings] = None, + task_prompt_override: Optional[str] = None, + include_title_if_available: Optional[bool] = False, + conversation_id: Optional[str] = None, + branch_id: Optional[str] = None, + # TODO - Deprecate messages + messages: Optional[dict | Message] = None, + ) -> list[Message] | AsyncGenerator[Message, None]: + """ + Performs a single turn in a conversation with a RAG agent. + + Args: + messages (List[Union[dict, Message]]): The messages to send to the agent. + rag_generation_config (Optional[Union[dict, GenerationConfig]]): RAG generation configuration. + chunk_search_settings (Optional[Union[dict, SearchSettings]]): Vector search settings. + graph_search_settings (Optional[Union[dict, GraphSearchSettings]]): KG search settings. + task_prompt_override (Optional[str]): Task prompt override. + include_title_if_available (Optional[bool]): Include the title if available. + + Returns: + Union[List[Message], AsyncGenerator[Message, None]]: The agent response. + """ + if messages: + logger.warning( + "The `messages` argument is deprecated. Please use `message` instead." + ) + if rag_generation_config and not isinstance( + rag_generation_config, dict + ): + rag_generation_config = rag_generation_config.model_dump() + if search_settings and not isinstance(search_settings, dict): + search_settings = search_settings.model_dump() + + data = { + "rag_generation_config": rag_generation_config or {}, + "search_settings": search_settings or {}, + "task_prompt_override": task_prompt_override, + "include_title_if_available": include_title_if_available, + "conversation_id": conversation_id, + "branch_id": branch_id, + } + + if message: + cast_message: Message = ( + Message(**message) if isinstance(message, dict) else message + ) + data["message"] = cast_message.model_dump() + + if messages: + data["messages"] = [ + ( + Message(**msg).model_dump() # type: ignore + if isinstance(msg, dict) + else msg.model_dump() # type: ignore + ) + for msg in messages + ] + + if rag_generation_config and rag_generation_config.get( # type: ignore + "stream", False + ): + return self._make_streaming_request("POST", "agent", json=data) # type: ignore + else: + return self._make_request("POST", "agent", json=data) # type: ignore + + def embedding( + self, + content: str, + ) -> list[float]: + """ + Generate embeddings for the provided content. + + Args: + content (str): The text content to embed. + + Returns: + list[float]: The generated embedding vector. + """ + return self._make_request("POST", "embedding", json=content) # type: ignore diff --git a/py/sdk/v2/sync_server.py b/py/sdk/v2/sync_server.py new file mode 100644 index 000000000..5106d70da --- /dev/null +++ b/py/sdk/v2/sync_server.py @@ -0,0 +1,48 @@ +from __future__ import annotations # for Python 3.10+ + +from typing import Optional + +from typing_extensions import deprecated + + +class SyncServerMixins: + def health(self) -> dict: + return self._make_request("GET", "health") # type: ignore + + def server_stats(self) -> dict: + """ + Get statistics about the server, including the start time, uptime, CPU usage, and memory usage. + + Returns: + dict: The server statistics. + """ + return self._make_request("GET", "server_stats") # type: ignore + + @deprecated("Use client.system.logs() instead") + def logs( + self, + offset: Optional[int] = None, + limit: Optional[int] = None, + run_type_filter: Optional[str] = None, + ) -> dict: + """ + Get logs from the server. + + Args: + offset (Optional[int]): The offset to start from. + limit (Optional[int]): The maximum number of logs to return. + run_type_filter (Optional[str]): The run type to filter by. + + Returns: + dict: The logs from the server. + """ + params = { + key: value + for key, value in { + "offset": offset, + "limit": limit, + "run_type_filter": run_type_filter, + }.items() + if value is not None + } + return self._make_request("GET", "logs", params=params) # type: ignore diff --git a/py/sdk/v3/__init__.py b/py/sdk/v3/__init__.py new file mode 100644 index 000000000..bda8063cc --- /dev/null +++ b/py/sdk/v3/__init__.py @@ -0,0 +1,23 @@ +from .chunks import * +from .collections import * +from .conversations import * +from .documents import * +from .graphs import * +from .indices import * +from .prompts import * +from .retrieval import * +from .system import * +from .users import * + +__all__ = [ + "Chunks", + "Collections", + "Conversations", + "Documents", + "Graphs", + "Indices", + "Prompts", + "Retrieval", + "System", + "Users", +] diff --git a/py/sdk/v3/chunks.py b/py/sdk/v3/chunks.py new file mode 100644 index 000000000..48700968d --- /dev/null +++ b/py/sdk/v3/chunks.py @@ -0,0 +1,208 @@ +import json +from typing import Optional +from uuid import UUID + +from shared.api.models.base import WrappedBooleanResponse +from shared.api.models.management.responses import ( + WrappedChunkResponse, + WrappedChunksResponse, +) + +from ..models import SearchSettings + + +class ChunksSDK: + """ + SDK for interacting with chunks in the v3 API. + """ + + def __init__(self, client): + self.client = client + + async def create( + self, + chunks: list[dict], + run_with_orchestration: Optional[bool] = True, + ) -> list[dict]: + """ + Create multiple chunks. + + Args: + chunks: List of UnprocessedChunk objects containing: + - id: Optional[UUID] + - document_id: Optional[UUID] + - collection_ids: list[UUID] + - metadata: dict + - text: str + run_with_orchestration: Whether to run the chunks through orchestration + + Returns: + list[dict]: List of creation results containing processed chunk information + """ + data = { + "chunks": chunks, + "run_with_orchestration": run_with_orchestration, + } + return await self.client._make_request( + "POST", + "chunks", + json=data, + version="v3", + ) + + async def update( + self, + chunk: dict[str, str], + ) -> WrappedChunkResponse: + """ + Update an existing chunk. + + Args: + chunk (dict[str, str]): Chunk to update. Should contain: + - id: UUID of the chunk + - metadata: Dictionary of metadata + Returns: + dict: Update results containing processed chunk information + """ + return await self.client._make_request( + "POST", + f"chunks/{str(chunk['id'])}", + json=chunk, + version="v3", + ) + + async def retrieve( + self, + id: str | UUID, + ) -> WrappedChunkResponse: + """ + Get a specific chunk. + + Args: + id (str | UUID): Chunk ID to retrieve + + Returns: + dict: List of chunks and pagination information + """ + + return await self.client._make_request( + "GET", + f"chunks/{id}", + version="v3", + ) + + # FIXME: Is this the most appropriate name for this method? + async def list_by_document( + self, + document_id: str | UUID, + metadata_filter: Optional[dict] = None, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedChunksResponse: + """ + List chunks for a specific document. + + Args: + document_id (str | UUID): Document ID to get chunks for + metadata_filter (Optional[dict]): Filter chunks by metadata + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of chunks and pagination information + """ + params: dict = { + "offset": offset, + "limit": limit, + } + if metadata_filter: + params["metadata_filter"] = json.dumps(metadata_filter) + + return await self.client._make_request( + "GET", + f"documents/{str(document_id)}/chunks", + params=params, + version="v3", + ) + + async def delete( + self, + id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Delete a specific chunk. + + Args: + id (Union[str, UUID]): ID of chunk to delete + """ + return await self.client._make_request( + "DELETE", + f"chunks/{str(id)}", + version="v3", + ) + + async def list( + self, + include_vectors: bool = False, + metadata_filter: Optional[dict] = None, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedChunksResponse: + """ + List chunks with pagination support. + + Args: + include_vectors (bool, optional): Include vector data in response. Defaults to False. + metadata_filter (Optional[dict], optional): Filter by metadata. Defaults to None. + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: Dictionary containing: + - results: List of chunks + - page_info: Pagination information + """ + params: dict = { + "offset": offset, + "limit": limit, + "include_vectors": include_vectors, + } + + if metadata_filter: + params["metadata_filter"] = json.dumps(metadata_filter) + + return await self.client._make_request( + "GET", + "chunks", + params=params, + version="v3", + ) + + async def search( + self, + query: str, + search_settings: Optional[dict | SearchSettings] = None, + ): # -> CombinedSearchResponse: + """ + Conduct a vector and/or KG search. + + Args: + query (str): The query to search for. + search_settings (Optional[dict, SearchSettings]]): Vector search settings. + + Returns: + CombinedSearchResponse: The search response. + """ + if search_settings and not isinstance(search_settings, dict): + search_settings = search_settings.model_dump() + + data = { + "query": query, + "search_settings": search_settings, + } + return await self.client._make_request( + "POST", + "chunks/search", + json=data, + version="v3", + ) diff --git a/py/sdk/v3/collections.py b/py/sdk/v3/collections.py new file mode 100644 index 000000000..ea7303ff4 --- /dev/null +++ b/py/sdk/v3/collections.py @@ -0,0 +1,313 @@ +from typing import Optional +from uuid import UUID + +from shared.api.models.base import ( + WrappedBooleanResponse, + WrappedGenericMessageResponse, +) +from shared.api.models.management.responses import ( + WrappedCollectionResponse, + WrappedCollectionsResponse, + WrappedDocumentResponse, + WrappedUsersResponse, +) + + +class CollectionsSDK: + def __init__(self, client): + self.client = client + + async def create( + self, + name: str, + description: Optional[str] = None, + ) -> WrappedCollectionResponse: + """ + Create a new collection. + + Args: + name (str): Name of the collection + description (Optional[str]): Description of the collection + + Returns: + dict: Created collection information + """ + data = {"name": name, "description": description} + return await self.client._make_request( + "POST", + "collections", + json=data, + version="v3", + ) + + async def list( + self, + ids: Optional[list[str | UUID]] = None, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedCollectionsResponse: + """ + List collections with pagination and filtering options. + + Args: + ids (Optional[list[str | UUID]]): Filter collections by ids + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of collections and pagination information + """ + params: dict = { + "offset": offset, + "limit": limit, + } + if ids: + params["ids"] = ids + + return await self.client._make_request( + "GET", "collections", params=params, version="v3" + ) + + async def retrieve( + self, + id: str | UUID, + ) -> WrappedCollectionResponse: + """ + Get detailed information about a specific collection. + + Args: + id (str | UUID): Collection ID to retrieve + + Returns: + dict: Detailed collection information + """ + return await self.client._make_request( + "GET", f"collections/{str(id)}", version="v3" + ) + + async def update( + self, + id: str | UUID, + name: Optional[str] = None, + description: Optional[str] = None, + ) -> WrappedCollectionResponse: + """ + Update collection information. + + Args: + id (str | UUID): Collection ID to update + name (Optional[str]): Optional new name for the collection + description (Optional[str]): Optional new description for the collection + + Returns: + dict: Updated collection information + """ + data = {} + if name is not None: + data["name"] = name + if description is not None: + data["description"] = description + + return await self.client._make_request( + "POST", + f"collections/{str(id)}", + json=data, + version="v3", + ) + + async def delete( + self, + id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Delete a collection. + + Args: + id (str | UUID): Collection ID to delete + + Returns: + bool: True if deletion was successful + """ + result = await self.client._make_request( + "DELETE", f"collections/{str(id)}", version="v3" + ) + return result.get("results", True) + + async def list_documents( + self, + id: str | UUID, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedDocumentResponse: + """ + List all documents in a collection. + + Args: + id (str | UUID): Collection ID + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of documents and pagination information + """ + params: dict = { + "offset": offset, + "limit": limit, + } + + return await self.client._make_request( + "GET", + f"collections/{str(id)}/documents", + params=params, + version="v3", + ) + + async def add_document( + self, + id: str | UUID, + document_id: str | UUID, + ) -> WrappedGenericMessageResponse: + """ + Add a document to a collection. + + Args: + id (str | UUID): Collection ID + document_id (str | UUID): Document ID to add + + Returns: + dict: Result of the operation + """ + return await self.client._make_request( + "POST", + f"collections/{str(id)}/documents/{str(document_id)}", + version="v3", + ) + + async def remove_document( + self, + id: str | UUID, + document_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Remove a document from a collection. + + Args: + id (str | UUID): Collection ID + document_id (str | UUID): Document ID to remove + + Returns: + bool: True if removal was successful + """ + result = await self.client._make_request( + "DELETE", + f"collections/{str(id)}/documents/{str(document_id)}", + version="v3", + ) + return result.get("results", True) + + async def list_users( + self, + id: str | UUID, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedUsersResponse: + """ + List all users in a collection. + + Args: + id (str, UUID): Collection ID + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of users and pagination information + """ + params: dict = { + "offset": offset, + "limit": limit, + } + + return await self.client._make_request( + "GET", f"collections/{str(id)}/users", params=params, version="v3" + ) + + async def add_user( + self, + id: str | UUID, + user_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Add a user to a collection. + + Args: + id (str | UUID): Collection ID + user_id (str | UUID): User ID to add + + Returns: + dict: Result of the operation + """ + return await self.client._make_request( + "POST", f"collections/{str(id)}/users/{str(user_id)}", version="v3" + ) + + async def remove_user( + self, + id: str | UUID, + user_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Remove a user from a collection. + + Args: + id (str | UUID): Collection ID + user_id (str | UUID): User ID to remove + + Returns: + bool: True if removal was successful + """ + result = await self.client._make_request( + "DELETE", + f"collections/{str(id)}/users/{str(user_id)}", + version="v3", + ) + return result.get("results", True) + + async def extract( + self, + id: str | UUID, + # run_type: Optional[str] = "RUN", + settings: Optional[dict] = None, + run_with_orchestration: Optional[bool] = True, + ) -> dict: + """ + Extract entities and relationships from documents in a collection. + + Args: + id (str | UUID): Collection ID to extract from + run_type (Optional[str]): Whether to return an estimate of the creation cost or to actually extract. + Defaults to "RUN" + settings (Optional[dict]): Settings for the entities and relationships extraction process + run_with_orchestration (Optional[bool]): Whether to run the extraction process with orchestration. + Defaults to True + + Returns: + dict: Result of the extraction process, containing either: + - For estimates: message, task_id, id, and estimate + - For runs: message and task_id + """ + params = { + # "run_type": run_type, + "run_with_orchestration": run_with_orchestration + } + + data = {} + if settings is not None: + data["settings"] = settings + + return await self.client._make_request( + "POST", + f"collections/{str(id)}/extract", + params=params, + json=data if data else None, + version="v3", + ) diff --git a/py/sdk/v3/conversations.py b/py/sdk/v3/conversations.py new file mode 100644 index 000000000..4fa3887a8 --- /dev/null +++ b/py/sdk/v3/conversations.py @@ -0,0 +1,223 @@ +from typing import Any, Optional +from uuid import UUID + +from shared.api.models.base import WrappedBooleanResponse +from shared.api.models.management.responses import ( + WrappedBranchesResponse, + WrappedConversationMessagesResponse, + WrappedConversationResponse, + WrappedConversationsResponse, + WrappedMessageResponse, +) + + +class ConversationsSDK: + def __init__(self, client): + self.client = client + + async def create(self) -> WrappedConversationResponse: + """ + Create a new conversation. + + Returns: + dict: Created conversation information + """ + return await self.client._make_request( + "POST", + "conversations", + version="v3", + ) + + async def list( + self, + ids: Optional[list[str | UUID]] = None, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedConversationsResponse: + """ + List conversations with pagination and sorting options. + + Args: + ids (Optional[list[Union[str, UUID]]]): List of conversation IDs to retrieve + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of conversations and pagination information + """ + params: dict = { + "offset": offset, + "limit": limit, + } + if ids: + params["ids"] = ids + + return await self.client._make_request( + "GET", + "conversations", + params=params, + version="v3", + ) + + async def retrieve( + self, + id: str | UUID, + branch_id: Optional[str] = None, + ) -> WrappedConversationMessagesResponse: + """ + Get detailed information about a specific conversation. + + Args: + id (Union[str, UUID]): The ID of the conversation to retrieve + branch_id (Optional[str]): The ID of the branch to retrieve + + Returns: + dict: Detailed conversation information + """ + params = {} + if branch_id: + params["branch_id"] = branch_id + + return await self.client._make_request( + "GET", + f"conversations/{str(id)}", + params=params, + version="v3", + ) + + async def delete( + self, + id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Delete a conversation. + + Args: + id (Union[str, UUID]): The ID of the conversation to delete + + Returns: + bool: True if deletion was successful + """ + return await self.client._make_request( + "DELETE", + f"conversations/{str(id)}", + version="v3", + ) + + async def add_message( + self, + id: str | UUID, + content: str, + role: str, + parent_id: Optional[str] = None, + metadata: Optional[dict] = None, + ) -> WrappedMessageResponse: + """ + Add a new message to a conversation. + + Args: + id (Union[str, UUID]): The ID of the conversation to add the message to + content (str): The content of the message + role (str): The role of the message (e.g., "user" or "assistant") + parent_id (Optional[str]): The ID of the parent message + metadata (Optional[dict]): Additional metadata to attach to the message + + Returns: + dict: Result of the operation, including the new message ID + """ + data: dict[str, Any] = { + "content": content, + "role": role, + } + if parent_id: + data["parent_id"] = parent_id + if metadata: + data["metadata"] = metadata + + return await self.client._make_request( + "POST", + f"conversations/{str(id)}/messages", + data=data, + version="v3", + ) + + async def update_message( + self, + id: str | UUID, + message_id: str, + content: str, + ) -> dict: + """ + Update an existing message in a conversation. + + Args: + id (Union[str, UUID]): The ID of the conversation containing the message + message_id (str): The ID of the message to update + content (str): The new content of the message + + Returns: + dict: Result of the operation, including the new message ID and branch ID + """ + # data = {"content": content} + return await self.client._make_request( + "PUT", + f"conversations/{str(id)}/messages/{message_id}", + json=content, + version="v3", + ) + + async def list_branches( + self, + id: str | UUID, + ) -> WrappedBranchesResponse: + """ + List all branches in a conversation. + + Args: + id (Union[str, UUID]): The ID of the conversation to list branches for + + Returns: + dict: List of branches in the conversation + """ + return await self.client._make_request( + "GET", + f"conversations/{str(id)}/branches", + version="v3", + ) + + # Commented methods to be added after more testing + # async def get_next_branch( + # self, + # id: Union[str, UUID], + # branch_id: str, + # ) -> dict: + # """ + # Get the next branch in the conversation. + # """ + # return await self.client._make_request( + # "GET", f"conversations/{str(id)}/branches/{branch_id}/next" + # ) + + # async def get_previous_branch( + # self, + # id: Union[str, UUID], + # branch_id: str, + # ) -> dict: + # """ + # Get the previous branch in the conversation. + # """ + # return await self.client._make_request( + # "GET", f"conversations/{str(id)}/branches/{branch_id}/previous" + # ) + + # async def create_branch( + # self, + # id: Union[str, UUID], + # message_id: str, + # ) -> dict: + # """ + # Create a new branch starting from a specific message. + # """ + # return await self.client._make_request( + # "POST", f"conversations/{str(id)}/messages/{message_id}/branch" + # ) diff --git a/py/sdk/v3/documents.py b/py/sdk/v3/documents.py new file mode 100644 index 000000000..a0b4dff6d --- /dev/null +++ b/py/sdk/v3/documents.py @@ -0,0 +1,401 @@ +import json +from io import BytesIO +from typing import Optional +from uuid import UUID + +from shared.api.models.base import WrappedBooleanResponse +from shared.api.models.ingestion.responses import WrappedIngestionResponse +from shared.api.models.management.responses import ( + WrappedChunksResponse, + WrappedCollectionsResponse, + WrappedDocumentResponse, + WrappedDocumentsResponse, +) + + +class DocumentsSDK: + """ + SDK for interacting with documents in the v3 API. + """ + + def __init__(self, client): + self.client = client + + async def create( + self, + file_path: Optional[str] = None, + raw_text: Optional[str] = None, + chunks: Optional[list[str]] = None, + id: Optional[str | UUID] = None, + collection_ids: Optional[list[str | UUID]] = None, + metadata: Optional[dict] = None, + ingestion_config: Optional[dict] = None, + run_with_orchestration: Optional[bool] = True, + ) -> WrappedIngestionResponse: + """ + Create a new document from either a file or content. + + Args: + file_path (Optional[str]): The file to upload, if any + content (Optional[str]): Optional text content to upload, if no file path is provided + id (Optional[Union[str, UUID]]): Optional ID to assign to the document + collection_ids (Optional[list[Union[str, UUID]]]): Collection IDs to associate with the document. If none are provided, the document will be assigned to the user's default collection. + metadata (Optional[dict]): Optional metadata to assign to the document + ingestion_config (Optional[dict]): Optional ingestion configuration to use + run_with_orchestration (Optional[bool]): Whether to run with orchestration + """ + if not file_path and not raw_text and not chunks: + raise ValueError( + "Either `file_path`, `raw_text` or `chunks` must be provided" + ) + if ( + (file_path and raw_text) + or (file_path and chunks) + or (raw_text and chunks) + ): + raise ValueError( + "Only one of `file_path`, `raw_text` or `chunks` may be provided" + ) + + data = {} + files = None + + if id: + data["id"] = str(id) # json.dumps(str(id)) + if metadata: + data["metadata"] = json.dumps(metadata) + if ingestion_config: + data["ingestion_config"] = json.dumps(ingestion_config) + if collection_ids: + collection_ids = [str(collection_id) for collection_id in collection_ids] # type: ignore + data["collection_ids"] = json.dumps(collection_ids) + if run_with_orchestration is not None: + data["run_with_orchestration"] = str(run_with_orchestration) + + if file_path: + # Create a new file instance that will remain open during the request + file_instance = open(file_path, "rb") + files = [ + ( + "file", + (file_path, file_instance, "application/octet-stream"), + ) + ] + try: + result = await self.client._make_request( + "POST", + "documents", + data=data, + files=files, + version="v3", + ) + finally: + # Ensure we close the file after the request is complete + file_instance.close() + return result + elif raw_text: + data["raw_text"] = raw_text # type: ignore + return await self.client._make_request( + "POST", + "documents", + data=data, + version="v3", + ) + else: + data["chunks"] = json.dumps(chunks) + return await self.client._make_request( + "POST", + "documents", + data=data, + version="v3", + ) + + async def retrieve( + self, + id: str | UUID, + ) -> WrappedDocumentResponse: + """ + Get a specific document by ID. + + Args: + id (Union[str, UUID]): ID of document to retrieve + + Returns: + dict: Document information + """ + return await self.client._make_request( + "GET", + f"documents/{str(id)}", + version="v3", + ) + + async def download( + self, + id: str | UUID, + ) -> BytesIO: + """ + Download a document's file content. + + Args: + id (Union[str, UUID]): ID of document to download + + Returns: + BytesIO: File content as a binary stream + """ + return await self.client._make_request( + "GET", + f"documents/{str(id)}/download", + version="v3", + ) + + async def delete( + self, + id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Delete a specific document. + + Args: + id (Union[str, UUID]): ID of document to delete + """ + return await self.client._make_request( + "DELETE", + f"documents/{str(id)}", + version="v3", + ) + + async def list_chunks( + self, + id: str | UUID, + include_vectors: Optional[bool] = False, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedChunksResponse: + """ + Get chunks for a specific document. + + Args: + id (Union[str, UUID]): ID of document to retrieve chunks for + include_vectors (Optional[bool]): Whether to include vector embeddings in the response + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of document chunks and pagination information + """ + params = { + "offset": offset, + "limit": limit, + "include_vectors": include_vectors, + } + return await self.client._make_request( + "GET", + f"documents/{str(id)}/chunks", + params=params, + version="v3", + ) + + async def list_collections( + self, + id: str | UUID, + include_vectors: Optional[bool] = False, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedCollectionsResponse: + """ + List collections for a specific document. + + Args: + id (Union[str, UUID]): ID of document to retrieve collections for + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of document chunks and pagination information + """ + params = { + "offset": offset, + "limit": limit, + } + + return await self.client._make_request( + "GET", + f"documents/{str(id)}/collections", + params=params, + version="v3", + ) + + async def delete_by_filter( + self, + filters: dict, + ) -> WrappedBooleanResponse: + """ + Delete documents based on filters. + + Args: + filters (dict): Filters to apply when selecting documents to delete + """ + filters_json = json.dumps(filters) + return await self.client._make_request( + "DELETE", + "documents/by-filter", + data=filters_json, + # params={"filters": filters_json}, + # data=filters, + version="v3", + ) + + async def extract( + self, + id: str | UUID, + run_type: Optional[str] = None, + settings: Optional[dict] = None, + run_with_orchestration: Optional[bool] = True, + ) -> dict: + """ + Extract entities and relationships from a document. + + Args: + id (Union[str, UUID]): ID of document to extract from + run_type (Optional[str]): Whether to return an estimate or run extraction + settings (Optional[dict]): Settings for extraction process + run_with_orchestration (Optional[bool]): Whether to run with orchestration + + Returns: + dict: Extraction results or cost estimate + """ + data = {} + if run_type: + data["run_type"] = run_type + if settings: + data["settings"] = json.dumps(settings) + if run_with_orchestration is not None: + data["run_with_orchestration"] = str(run_with_orchestration) + + return await self.client._make_request( + "POST", + f"documents/{str(id)}/extract", + params=data, + version="v3", + ) + + async def list_entities( + self, + id: str | UUID, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + include_embeddings: Optional[bool] = False, + ) -> dict: + """ + List entities extracted from a document. + + Args: + id (Union[str, UUID]): ID of document to get entities from + offset (Optional[int]): Number of items to skip + limit (Optional[int]): Max number of items to return + include_embeddings (Optional[bool]): Whether to include embeddings + + Returns: + dict: List of entities and pagination info + """ + params = { + "offset": offset, + "limit": limit, + "include_embeddings": include_embeddings, + } + return await self.client._make_request( + "GET", + f"documents/{str(id)}/entities", + params=params, + version="v3", + ) + + async def list_relationships( + self, + id: str | UUID, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + entity_names: Optional[list[str]] = None, + relationship_types: Optional[list[str]] = None, + ) -> dict: + """ + List relationships extracted from a document. + + Args: + id (Union[str, UUID]): ID of document to get relationships from + offset (Optional[int]): Number of items to skip + limit (Optional[int]): Max number of items to return + entity_names (Optional[list[str]]): Filter by entity names + relationship_types (Optional[list[str]]): Filter by relationship types + + Returns: + dict: List of relationships and pagination info + """ + params = { + "offset": offset, + "limit": limit, + } + if entity_names: + params["entity_names"] = entity_names + if relationship_types: + params["relationship_types"] = relationship_types + + return await self.client._make_request( + "GET", + f"documents/{str(id)}/relationships", + params=params, + version="v3", + ) + + # async def extract( + # self, + # id: str | UUID, + # run_type: Optional[str] = None, + # run_with_orchestration: Optional[bool] = True, + # ): + # data = {} + + # if run_type: + # data["run_type"] = run_type + # if run_with_orchestration is not None: + # data["run_with_orchestration"] = str(run_with_orchestration) + + # return await self.client._make_request( + # "POST", + # f"documents/{str(id)}/extract", + # params=data, + # version="v3", + # ) + + # Be sure to put at bottom of the page... + + async def list( + self, + ids: Optional[list[str | UUID]] = None, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedDocumentsResponse: + """ + List documents with pagination. + + Args: + ids (Optional[list[Union[str, UUID]]]): Optional list of document IDs to filter by + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of documents and pagination information + """ + params = { + "offset": offset, + "limit": limit, + } + if ids: + params["ids"] = [str(doc_id) for doc_id in ids] # type: ignore + + return await self.client._make_request( + "GET", + "documents", + params=params, + version="v3", + ) diff --git a/py/sdk/v3/graphs.py b/py/sdk/v3/graphs.py new file mode 100644 index 000000000..3371cf8a1 --- /dev/null +++ b/py/sdk/v3/graphs.py @@ -0,0 +1,477 @@ +from typing import Any, Optional +from uuid import UUID + +from shared.api.models.base import WrappedBooleanResponse +from shared.api.models.kg.responses import ( + WrappedCommunitiesResponse, + WrappedCommunityResponse, + WrappedEntitiesResponse, + WrappedEntityResponse, + WrappedGraphResponse, + WrappedGraphsResponse, + WrappedRelationshipResponse, + WrappedRelationshipsResponse, +) + +_list = list # Required for type hinting since we have a list method + + +class GraphsSDK: + """ + SDK for interacting with knowledge graphs in the v3 API. + """ + + def __init__(self, client): + self.client = client + + async def list( + self, + collection_ids: Optional[list[str | UUID]] = None, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedGraphsResponse: + """ + List graphs with pagination and filtering options. + + Args: + ids (Optional[list[str | UUID]]): Filter graphs by ids + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of graphs and pagination information + """ + params: dict = { + "offset": offset, + "limit": limit, + } + if collection_ids: + params["collection_ids"] = collection_ids + + return await self.client._make_request( + "GET", "graphs", params=params, version="v3" + ) + + async def retrieve( + self, + collection_id: str | UUID, + ) -> WrappedGraphResponse: + """ + Get detailed information about a specific graph. + + Args: + collection_id (str | UUID): Graph ID to retrieve + + Returns: + dict: Detailed graph information + """ + return await self.client._make_request( + "GET", f"graphs/{str(collection_id)}", version="v3" + ) + + async def reset( + self, + collection_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Deletes a graph and all its associated data. + + This endpoint permanently removes the specified graph along with all + entities and relationships that belong to only this graph. + + Entities and relationships extracted from documents are not deleted. + + Args: + collection_id (str | UUID): Graph ID to reset + + Returns: + dict: Success message + """ + return await self.client._make_request( + "POST", f"graphs/{str(collection_id)}/reset", version="v3" + ) + + async def update( + self, + collection_id: str | UUID, + name: Optional[str] = None, + description: Optional[str] = None, + ) -> WrappedGraphResponse: + """ + Update graph information. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + name (Optional[str]): Optional new name for the graph + description (Optional[str]): Optional new description for the graph + + Returns: + dict: Updated graph information + """ + data = {} + if name is not None: + data["name"] = name + if description is not None: + data["description"] = description + + return await self.client._make_request( + "POST", + f"graphs/{str(collection_id)}", + json=data, + version="v3", + ) + + # TODO: create entity + + async def list_entities( + self, + collection_id: str | UUID, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedEntitiesResponse: + """ + List entities in a graph. + + Args: + collection_id (str | UUID): Graph ID to list entities from + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of entities and pagination information + """ + params: dict = { + "offset": offset, + "limit": limit, + } + + return await self.client._make_request( + "GET", + f"graphs/{str(collection_id)}/entities", + params=params, + version="v3", + ) + + async def get_entity( + self, + collection_id: str | UUID, + entity_id: str | UUID, + ) -> WrappedEntityResponse: + """ + Get entity information in a graph. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + entity_id (str | UUID): Entity ID to get from the graph + + Returns: + dict: Entity information + """ + return await self.client._make_request( + "GET", + f"graphs/{str(collection_id)}/entities/{str(entity_id)}", + version="v3", + ) + + # TODO: update entity + + async def remove_entity( + self, + collection_id: str | UUID, + entity_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Remove an entity from a graph. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + entity_id (str | UUID): Entity ID to remove from the graph + + Returns: + dict: Success message + """ + return await self.client._make_request( + "DELETE", + f"graphs/{str(collection_id)}/entities/{str(entity_id)}", + version="v3", + ) + + # TODO: create relationship + + async def list_relationships( + self, + collection_id: str | UUID, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedRelationshipsResponse: + """ + List relationships in a graph. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of relationships and pagination information + """ + params: dict = { + "offset": offset, + "limit": limit, + } + + return await self.client._make_request( + "GET", + f"graphs/{str(collection_id)}/relationships", + params=params, + version="v3", + ) + + async def get_relationship( + self, + collection_id: str | UUID, + relationship_id: str | UUID, + ) -> WrappedRelationshipResponse: + """ + Get relationship information in a graph. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + relationship_id (str | UUID): Relationship ID to get from the graph + + Returns: + dict: Relationship information + """ + return await self.client._make_request( + "GET", + f"graphs/{str(collection_id)}/relationships/{str(relationship_id)}", + version="v3", + ) + + # TODO: update relationship + + async def remove_relationship( + self, + collection_id: str | UUID, + relationship_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Remove a relationship from a graph. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + relationship_id (str | UUID): Relationship ID to remove from the graph + + Returns: + dict: Success message + """ + return await self.client._make_request( + "DELETE", + f"graphs/{str(collection_id)}/relationships/{str(relationship_id)}", + version="v3", + ) + + async def build( + self, + collection_id: str | UUID, + settings: Optional[dict] = None, + run_type: str = "estimate", + run_with_orchestration: bool = True, + ) -> WrappedBooleanResponse: + """ + Build a graph. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + settings (dict): Settings for the build + run_type (str, optional): Type of build to run. Defaults to "estimate". + run_with_orchestration (bool, optional): Whether to run with orchestration. Defaults to True. + + Returns: + dict: Success message + """ + data = { + "run_type": run_type, + "run_with_orchestration": run_with_orchestration, + } + if settings: + data["settings"] = settings + return await self.client._make_request( + "POST", + f"graphs/{str(collection_id)}/communities/build", + json=data, + version="v3", + ) + + # TODO: create community + + async def list_communities( + self, + collection_id: str | UUID, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedCommunitiesResponse: + """ + List communities in a graph. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of communities and pagination information + """ + params: dict = { + "offset": offset, + "limit": limit, + } + + return await self.client._make_request( + "GET", + f"graphs/{str(collection_id)}/communities", + params=params, + version="v3", + ) + + async def get_community( + self, + collection_id: str | UUID, + community_id: str | UUID, + ) -> WrappedCommunityResponse: + """ + Get community information in a graph. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + community_id (str | UUID): Community ID to get from the graph + + Returns: + dict: Community information + """ + return await self.client._make_request( + "GET", + f"graphs/{str(collection_id)}/communities/{str(community_id)}", + version="v3", + ) + + async def update_community( + self, + collection_id: str | UUID, + community_id: str | UUID, + name: Optional[str] = None, + summary: Optional[str] = None, + findings: Optional[_list[str]] = None, + rating: Optional[int] = None, + rating_explanation: Optional[str] = None, + level: Optional[int] = None, + attributes: Optional[dict] = None, + ) -> WrappedCommunityResponse: + """ + Update community information. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + community_id (str | UUID): Community ID to update + name (Optional[str]): Optional new name for the community + summary (Optional[str]): Optional new summary for the community + findings (Optional[list[str]]): Optional new findings for the community + rating (Optional[int]): Optional new rating for the community + rating_explanation (Optional[str]): Optional new rating explanation for the community + level (Optional[int]): Optional new level for the community + attributes (Optional[dict]): Optional new attributes for the community + + Returns: + dict: Updated community information + """ + data: dict[str, Any] = {} + if name is not None: + data["name"] = name + if summary is not None: + data["summary"] = summary + if findings is not None: + data["findings"] = findings + if rating is not None: + data["rating"] = str(rating) + if rating_explanation is not None: + data["rating_explanation"] = rating_explanation + if level is not None: + data["level"] = level + if attributes is not None: + data["attributes"] = attributes + + return await self.client._make_request( + "POST", + f"graphs/{str(collection_id)}/communities/{str(community_id)}", + json=data, + version="v3", + ) + + async def delete_community( + self, + collection_id: str | UUID, + community_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Remove a community from a graph. + + Args: + collection_id (str | UUID): The collection ID corresponding to the graph + community_id (str | UUID): Community ID to remove from the graph + + Returns: + dict: Success message + """ + return await self.client._make_request( + "DELETE", + f"graphs/{str(collection_id)}/communities/{str(community_id)}", + version="v3", + ) + + async def pull( + self, + collection_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Adds documents to a graph by copying their entities and relationships. + + This endpoint: + 1. Copies document entities to the graphs_entities table + 2. Copies document relationships to the graphs_relationships table + 3. Associates the documents with the graph + + When a document is added: + - Its entities and relationships are copied to graph-specific tables + - Existing entities/relationships are updated by merging their properties + - The document ID is recorded in the graph's document_ids array + + Documents added to a graph will contribute their knowledge to: + - Graph analysis and querying + - Community detection + - Knowledge graph enrichment + """ + return await self.client._make_request( + "POST", + f"graphs/{str(collection_id)}/pull", + version="v3", + ) + + async def remove_document( + self, + collection_id: str | UUID, + document_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Removes a document from a graph and removes any associated entities + + This endpoint: + 1. Removes the document ID from the graph's document_ids array + 2. Optionally deletes the document's copied entities and relationships + + The user must have access to both the graph and the document being removed. + """ + return await self.client._make_request( + "DELETE", + f"graphs/{str(collection_id)}/documents/{str(document_id)}", + version="v3", + ) diff --git a/py/sdk/v3/indices.py b/py/sdk/v3/indices.py new file mode 100644 index 000000000..23e68989c --- /dev/null +++ b/py/sdk/v3/indices.py @@ -0,0 +1,136 @@ +import json +from typing import Optional + +from shared.api.models.base import WrappedGenericMessageResponse +from shared.api.models.ingestion.responses import ( + WrappedListVectorIndicesResponse, +) + + +class IndicesSDK: + def __init__(self, client): + self.client = client + + async def create( + self, + config: dict, + run_with_orchestration: Optional[bool] = True, + ) -> WrappedGenericMessageResponse: + """ + Create a new vector similarity search index in the database. + + Args: + config (Union[dict, IndexConfig]): Configuration for the vector index. + run_with_orchestration (Optional[bool]): Whether to run index creation as an orchestrated task. + """ + if not isinstance(config, dict): + config = config.model_dump() + + data = { + "config": config, + "run_with_orchestration": run_with_orchestration, + } + return await self.client._make_request( + "POST", + "indices", + json=data, + version="v3", + ) + + async def list( + self, + filters: Optional[dict] = None, + offset: Optional[int] = 0, + limit: Optional[int] = 10, + ) -> WrappedListVectorIndicesResponse: + """ + List existing vector similarity search indices with pagination support. + + Args: + filters (Optional[dict]): Filter criteria for indices. + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + WrappedListVectorIndicesResponse: The response containing the list of indices. + """ + params: dict = { + "offset": offset, + "limit": limit, + } + if filters: + params["filters"] = json.dumps(filters) + return await self.client._make_request( + "GET", + "indices", + params=params, + version="v3", + ) + + async def retrieve( + self, + index_name: str, + table_name: str = "vectors", + ) -> dict: + """ + Get detailed information about a specific vector index. + + Args: + index_name (str): The name of the index to retrieve. + table_name (str): The name of the table where the index is stored. + + Returns: + WrappedGetIndexResponse: The response containing the index details. + """ + return await self.client._make_request( + "GET", + f"indices/{table_name}/{index_name}", + version="v3", + ) + + # async def update_index( + # self, + # id: Union[str, UUID], + # config: dict, # Union[dict, IndexConfig], + # run_with_orchestration: Optional[bool] = True, + # ) -> dict: + # """ + # Update an existing index's configuration. + + # Args: + # id (Union[str, UUID]): The ID of the index to update. + # config (Union[dict, IndexConfig]): The new configuration for the index. + # run_with_orchestration (Optional[bool]): Whether to run the update as an orchestrated task. + + # Returns: + # WrappedUpdateIndexResponse: The response containing the updated index details. + # """ + # if not isinstance(config, dict): + # config = config.model_dump() + + # data = { + # "config": config, + # "run_with_orchestration": run_with_orchestration, + # } + # return await self.client._make_request("POST", f"indices/{id}", json=data) # type: ignore + + async def delete( + self, + index_name: str, + table_name: str = "vectors", + ) -> WrappedGenericMessageResponse: + """ + Delete an existing vector index. + + Args: + index_name (str): The name of the index to retrieve. + table_name (str): The name of the table where the index is stored. + + Returns: + WrappedGetIndexResponse: The response containing the index details. + """ + return await self.client._make_request( + "DELETE", + f"indices/{table_name}/{index_name}", + version="v3", + ) diff --git a/py/sdk/v3/prompts.py b/py/sdk/v3/prompts.py new file mode 100644 index 000000000..ff7e8d494 --- /dev/null +++ b/py/sdk/v3/prompts.py @@ -0,0 +1,116 @@ +import json +from typing import Optional + +from shared.api.models.base import ( + WrappedBooleanResponse, + WrappedGenericMessageResponse, +) +from shared.api.models.management.responses import ( + WrappedPromptResponse, + WrappedPromptsResponse, +) + + +class PromptsSDK: + def __init__(self, client): + self.client = client + + async def create( + self, name: str, template: str, input_types: dict + ) -> WrappedGenericMessageResponse: + """ + Create a new prompt. + Args: + name (str): The name of the prompt + template (str): The template string for the prompt + input_types (dict): A dictionary mapping input names to their types + Returns: + dict: Created prompt information + """ + data = {"name": name, "template": template, "input_types": input_types} + return await self.client._make_request( + "POST", + "prompts", + json=data, + version="v3", + ) + + async def list(self) -> WrappedPromptsResponse: + """ + List all available prompts. + Returns: + dict: List of all available prompts + """ + return await self.client._make_request( + "GET", + "prompts", + version="v3", + ) + + async def retrieve( + self, + name: str, + inputs: Optional[dict] = None, + prompt_override: Optional[str] = None, + ) -> WrappedPromptResponse: + """ + Get a specific prompt by name, optionally with inputs and override. + Args: + name (str): The name of the prompt to retrieve + inputs (Optional[dict]): JSON-encoded inputs for the prompt + prompt_override (Optional[str]): An override for the prompt template + Returns: + dict: The requested prompt with applied inputs and/or override + """ + params = {} + if inputs: + params["inputs"] = json.dumps(inputs) + if prompt_override: + params["prompt_override"] = prompt_override + return await self.client._make_request( + "POST", + f"prompts/{name}", + params=params, + version="v3", + ) + + async def update( + self, + name: str, + template: Optional[str] = None, + input_types: Optional[dict] = None, + ) -> WrappedGenericMessageResponse: + """ + Update an existing prompt's template and/or input types. + Args: + name (str): The name of the prompt to update + template (Optional[str]): The updated template string for the prompt + input_types (Optional[dict]): The updated dictionary mapping input names to their types + Returns: + dict: The updated prompt details + """ + data: dict = {} + if template: + data["template"] = template + if input_types: + data["input_types"] = json.dumps(input_types) + return await self.client._make_request( + "PUT", + f"prompts/{name}", + json=data, + version="v3", + ) + + async def delete(self, name: str) -> WrappedBooleanResponse: + """ + Delete a prompt by name. + Args: + name (str): The name of the prompt to delete + Returns: + bool: True if deletion was successful + """ + return await self.client._make_request( + "DELETE", + f"prompts/{name}", + version="v3", + ) diff --git a/py/sdk/v3/retrieval.py b/py/sdk/v3/retrieval.py new file mode 100644 index 000000000..ed3a1438a --- /dev/null +++ b/py/sdk/v3/retrieval.py @@ -0,0 +1,202 @@ +from typing import AsyncGenerator, Optional + +from ..models import ( + CombinedSearchResponse, + GenerationConfig, + GraphSearchSettings, + Message, + RAGResponse, + SearchSettings, +) + + +class RetrievalSDK: + """ + SDK for interacting with documents in the v3 API. + """ + + def __init__(self, client): + self.client = client + + async def search( + self, + query: str, + search_settings: Optional[dict | SearchSettings] = None, + ) -> CombinedSearchResponse: + """ + Conduct a vector and/or KG search. + + Args: + query (str): The query to search for. + search_settings (Optional[dict, SearchSettings]]): Vector search settings. + + Returns: + CombinedSearchResponse: The search response. + """ + if search_settings and not isinstance(search_settings, dict): + search_settings = search_settings.model_dump() + + data = { + "query": query, + "search_settings": search_settings, + } + return await self.client._make_request( + "POST", + "retrieval/search", + json=data, + version="v3", + ) + + async def completion( + self, + messages: list[dict | Message], + generation_config: Optional[dict | GenerationConfig] = None, + ): + cast_messages: list[Message] = [ + Message(**msg) if isinstance(msg, dict) else msg + for msg in messages + ] + + if generation_config and not isinstance(generation_config, dict): + generation_config = generation_config.model_dump() + + data = { + "messages": [msg.model_dump() for msg in cast_messages], + "generation_config": generation_config, + } + + return await self.client._make_request( + "POST", + "retrieval/completion", + json=data, + version="v3", + ) + + async def embedding( + self, + text: str, + ): + data = { + "text": text, + } + + return await self.client._make_request( + "POST", + "retrieval/embedding", + json=data, + version="v3", + ) + + async def rag( + self, + query: str, + rag_generation_config: Optional[dict | GenerationConfig] = None, + search_settings: Optional[dict | SearchSettings] = None, + task_prompt_override: Optional[str] = None, + include_title_if_available: Optional[bool] = False, + ) -> RAGResponse | AsyncGenerator[RAGResponse, None]: + """ + Conducts a Retrieval Augmented Generation (RAG) search with the given query. + + Args: + query (str): The query to search for. + rag_generation_config (Optional[dict | GenerationConfig]): RAG generation configuration. + search_settings (Optional[dict | SearchSettings]): Vector search settings. + task_prompt_override (Optional[str]): Task prompt override. + include_title_if_available (Optional[bool]): Include the title if available. + + Returns: + RAGResponse | AsyncGenerator[RAGResponse, None]: The RAG response + """ + if rag_generation_config and not isinstance( + rag_generation_config, dict + ): + rag_generation_config = rag_generation_config.model_dump() + if search_settings and not isinstance(search_settings, dict): + search_settings = search_settings.model_dump() + + data = { + "query": query, + "rag_generation_config": rag_generation_config, + "search_settings": search_settings, + "task_prompt_override": task_prompt_override, + "include_title_if_available": include_title_if_available, + } + + if rag_generation_config and rag_generation_config.get( # type: ignore + "stream", False + ): + return self.client._make_streaming_request( + "POST", + "retrieval/rag", + json=data, + version="v3", + ) + else: + return await self.client._make_request( + "POST", + "retrieval/rag", + json=data, + version="v3", + ) + + async def agent( + self, + message: Optional[dict | Message] = None, + rag_generation_config: Optional[dict | GenerationConfig] = None, + search_settings: Optional[dict | SearchSettings] = None, + task_prompt_override: Optional[str] = None, + include_title_if_available: Optional[bool] = False, + conversation_id: Optional[str] = None, + branch_id: Optional[str] = None, + ) -> list[Message] | AsyncGenerator[Message, None]: + """ + Performs a single turn in a conversation with a RAG agent. + + Args: + message (Optional[dict | Message]): The message to send to the agent. + search_settings (Optional[dict | SearchSettings]): Vector search settings. + task_prompt_override (Optional[str]): Task prompt override. + include_title_if_available (Optional[bool]): Include the title if available. + + Returns: + List[Message], AsyncGenerator[Message, None]]: The agent response. + """ + if rag_generation_config and not isinstance( + rag_generation_config, dict + ): + rag_generation_config = rag_generation_config.model_dump() + if search_settings and not isinstance(search_settings, dict): + search_settings = search_settings.model_dump() + + data = { + "rag_generation_config": rag_generation_config or {}, + "search_settings": search_settings, + "task_prompt_override": task_prompt_override, + "include_title_if_available": include_title_if_available, + "conversation_id": conversation_id, + "branch_id": branch_id, + } + + if message: + cast_message: Message = ( + Message(**message) if isinstance(message, dict) else message + ) + data["message"] = cast_message.model_dump() + + if rag_generation_config and rag_generation_config.get( # type: ignore + "stream", False + ): + return self.client._make_streaming_request( + "POST", + "retrieval/agent", + json=data, + version="v3", + ) + else: + return await self.client._make_request( + "POST", + "retrieval/agent", + json=data, + version="v3", + ) diff --git a/py/sdk/v3/system.py b/py/sdk/v3/system.py new file mode 100644 index 000000000..d0b2c982e --- /dev/null +++ b/py/sdk/v3/system.py @@ -0,0 +1,71 @@ +from typing import Optional + +from shared.api.models.base import WrappedGenericMessageResponse +from shared.api.models.management.responses import ( + WrappedLogsResponse, + WrappedServerStatsResponse, + WrappedSettingsResponse, +) + + +class SystemSDK: + def __init__(self, client): + self.client = client + + async def health(self) -> WrappedGenericMessageResponse: + """ + Check the health of the R2R server. + """ + return await self.client._make_request("GET", "health", version="v3") + + async def logs( + self, + run_type_filter: Optional[str] = None, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedLogsResponse: + """ + Get logs from the server. + + Args: + run_type_filter (Optional[str]): The run type to filter by. + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: The logs from the server. + """ + params = { + key: value + for key, value in { + "run_type_filter": run_type_filter, + "offset": offset, + "limit": limit, + }.items() + if value is not None + } + return await self.client._make_request( + "GET", "system/logs", params=params, version="v3" + ) + + async def settings(self) -> WrappedSettingsResponse: + """ + Get the configuration settings for the R2R server. + + Returns: + dict: The server settings. + """ + return await self.client._make_request( + "GET", "system/settings", version="v3" + ) + + async def status(self) -> WrappedServerStatsResponse: + """ + Get statistics about the server, including the start time, uptime, CPU usage, and memory usage. + + Returns: + dict: The server statistics. + """ + return await self.client._make_request( + "GET", "system/status", version="v3" + ) diff --git a/py/sdk/v3/users.py b/py/sdk/v3/users.py new file mode 100644 index 000000000..595fb4dcb --- /dev/null +++ b/py/sdk/v3/users.py @@ -0,0 +1,395 @@ +from typing import Optional +from uuid import UUID + +from shared.api.models.auth.responses import WrappedTokenResponse +from shared.api.models.base import ( + WrappedBooleanResponse, + WrappedGenericMessageResponse, +) +from shared.api.models.management.responses import ( + WrappedCollectionsResponse, + WrappedUserResponse, + WrappedUsersResponse, +) + +from ..models import Token + + +class UsersSDK: + def __init__(self, client): + self.client = client + + async def register(self, email: str, password: str) -> WrappedUserResponse: + """ + Register a new user. + + Args: + email (str): User's email address + password (str): User's password + + Returns: + UserResponse: New user information + """ + data = {"email": email, "password": password} + return await self.client._make_request( + "POST", + "users/register", + json=data, + version="v3", + ) + + async def delete( + self, id: str | UUID, password: str + ) -> WrappedBooleanResponse: + """ + Delete a specific user. + Users can only delete their own account unless they are superusers. + + Args: + id (str | UUID): User ID to delete + password (str): User's password + + Returns: + dict: Deletion result + """ + data = {"password": password} + return await self.client._make_request( + "DELETE", + f"users/{str(id)}", + json=data, + version="v3", + ) + + async def verify_email( + self, email: str, verification_code: str + ) -> WrappedGenericMessageResponse: + """ + Verify a user's email address. + + Args: + email (str): User's email address + verification_code (str): Verification code sent to the user's email + + Returns: + dict: Verification result + """ + data = {"email": email, "verification_code": verification_code} + return await self.client._make_request( + "POST", + "users/verify-email", + json=data, + version="v3", + ) + + async def login(self, email: str, password: str) -> dict[str, Token]: + """ + Log in a user. + + Args: + email (str): User's email address + password (str): User's password + + Returns: + dict[str, Token]: Access and refresh tokens + """ + data = {"username": email, "password": password} + response = await self.client._make_request( + "POST", + "users/login", + data=data, + version="v3", + ) + self.client.access_token = response["results"]["access_token"]["token"] + self.client._refresh_token = response["results"]["refresh_token"][ + "token" + ] + return response + + # FIXME: What is going on here... + async def login_with_token(self, access_token: str) -> dict[str, Token]: + """ + Log in using an existing access token. + + Args: + access_token (str): Existing access token + + Returns: + dict[str, Token]: Token information + """ + self.client.access_token = access_token + try: + await self.client._make_request( + "GET", + "users/me", + version="v3", + ) + return { + "access_token": Token( + token=access_token, token_type="access_token" + ), + } + except Exception: + self.access_token = None + self.client._refresh_token = None + raise ValueError("Invalid token provided") + + async def logout(self) -> WrappedGenericMessageResponse: + """Log out the current user.""" + response = await self.client._make_request( + "POST", + "users/logout", + version="v3", + ) + self.client.access_token = None + self.client._refresh_token = None + return response + + async def refresh_token(self) -> WrappedTokenResponse: + """Refresh the access token using the refresh token.""" + response = await self.client._make_request( + "POST", + "users/refresh-token", + json=self.client._refresh_token, + version="v3", + ) + self.client.access_token = response["results"]["access_token"]["token"] + self.client._refresh_token = response["results"]["refresh_token"][ + "token" + ] + return response + + async def change_password( + self, current_password: str, new_password: str + ) -> WrappedGenericMessageResponse: + """ + Change the user's password. + + Args: + current_password (str): User's current password + new_password (str): User's new password + + Returns: + dict: Change password result + """ + data = { + "current_password": current_password, + "new_password": new_password, + } + return await self.client._make_request( + "POST", + "users/change-password", + json=data, + version="v3", + ) + + async def request_password_reset( + self, email: str + ) -> WrappedGenericMessageResponse: + """ + Request a password reset. + + Args: + email (str): User's email address + + Returns: + dict: Password reset request result + """ + return await self.client._make_request( + "POST", + "users/request-password-reset", + json=email, + version="v3", + ) + + async def reset_password( + self, reset_token: str, new_password: str + ) -> WrappedGenericMessageResponse: + """ + Reset password using a reset token. + + Args: + reset_token (str): Password reset token + new_password (str): New password + + Returns: + dict: Password reset result + """ + data = {"reset_token": reset_token, "new_password": new_password} + return await self.client._make_request( + "POST", + "users/reset-password", + json=data, + version="v3", + ) + + async def list( + self, + ids: Optional[list[str | UUID]] = None, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedUsersResponse: + """ + List users with pagination and filtering options. + + Args: + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of users and pagination information + """ + params = { + "offset": offset, + "limit": limit, + } + if ids: + params["ids"] = [str(user_id) for user_id in ids] # type: ignore + + return await self.client._make_request( + "GET", + "users", + params=params, + version="v3", + ) + + async def retrieve( + self, + id: str | UUID, + ) -> WrappedUserResponse: + """ + Get a specific user. + + Args: + id (str | UUID): User ID to retrieve + + Returns: + dict: Detailed user information + """ + return await self.client._make_request( + "GET", + f"users/{str(id)}", + version="v3", + ) + + async def me( + self, + ) -> WrappedUserResponse: + """ + Get detailed information about the currently authenticated user. + + Returns: + dict: Detailed user information + """ + return await self.client._make_request( + "GET", + "users/me", + version="v3", + ) + + async def update( + self, + id: str | UUID, + email: Optional[str] = None, + is_superuser: Optional[bool] = None, + name: Optional[str] = None, + bio: Optional[str] = None, + profile_picture: Optional[str] = None, + ) -> WrappedUserResponse: + """ + Update user information. + + Args: + id (str | UUID): User ID to update + username (Optional[str]): New username + is_superuser (Optional[bool]): Update superuser status + metadata (Optional[Dict[str, Any]]): Update user metadata + + Returns: + dict: Updated user information + """ + data: dict = {} + if email is not None: + data["email"] = email + if is_superuser is not None: + data["is_superuser"] = is_superuser + if name is not None: + data["name"] = name + if bio is not None: + data["bio"] = bio + if profile_picture is not None: + data["profile_picture"] = profile_picture + + return await self.client._make_request( + "POST", + f"users/{str(id)}", + json=data, # if len(data.keys()) != 1 else list(data.values())[0] + version="v3", + ) + + async def list_collections( + self, + id: str | UUID, + offset: Optional[int] = 0, + limit: Optional[int] = 100, + ) -> WrappedCollectionsResponse: + """ + Get all collections associated with a specific user. + + Args: + id (str | UUID): User ID to get collections for + offset (int, optional): Specifies the number of objects to skip. Defaults to 0. + limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100. + + Returns: + dict: List of collections and pagination information + """ + params = { + "offset": offset, + "limit": limit, + } + + return await self.client._make_request( + "GET", + f"users/{str(id)}/collections", + params=params, + version="v3", + ) + + async def add_to_collection( + self, + id: str | UUID, + collection_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Add a user to a collection. + + Args: + id (str | UUID): User ID to add + collection_id (str | UUID): Collection ID to add user to + """ + return await self.client._make_request( + "POST", + f"users/{str(id)}/collections/{str(collection_id)}", + version="v3", + ) + + async def remove_from_collection( + self, + id: str | UUID, + collection_id: str | UUID, + ) -> WrappedBooleanResponse: + """ + Remove a user from a collection. + + Args: + id (str | UUID): User ID to remove + collection_id (str | UUID): Collection ID to remove user from + + Returns: + bool: True if successful + """ + return await self.client._make_request( + "DELETE", + f"users/{str(id)}/collections/{str(collection_id)}", + version="v3", + ) diff --git a/py/shared/abstractions/__init__.py b/py/shared/abstractions/__init__.py index d17daef19..3fcaf4037 100644 --- a/py/shared/abstractions/__init__.py +++ b/py/shared/abstractions/__init__.py @@ -1,27 +1,23 @@ from .base import AsyncSyncMeta, R2RSerializable, syncable from .document import ( - DataType, Document, - DocumentExtraction, - DocumentInfo, + DocumentChunk, + DocumentResponse, DocumentType, IngestionStatus, KGEnrichmentStatus, KGExtractionStatus, RawChunk, + UnprocessedChunk, ) from .embedding import EmbeddingPurpose, default_embedding_prefixes from .exception import R2RDocumentProcessingError, R2RException -from .graph import ( - Community, - CommunityReport, - Entity, - EntityType, - KGExtraction, - RelationshipType, - Triple, -) +from .graph import Community, Entity, KGExtraction, Relationship from .kg import ( + GraphBuildSettings, + GraphCommunitySettings, + GraphEntitySettings, + GraphRelationshipSettings, KGCreationSettings, KGEnrichmentSettings, KGEntityDeduplicationSettings, @@ -38,19 +34,19 @@ from .prompt import Prompt from .search import ( AggregateSearchResult, + ChunkSearchResult, + ChunkSearchSettings, + GraphSearchResult, + GraphSearchSettings, HybridSearchSettings, KGCommunityResult, KGEntityResult, KGGlobalResult, KGRelationshipResult, - KGSearchMethod, - KGSearchResult, KGSearchResultType, - KGSearchSettings, SearchSettings, - VectorSearchResult, ) -from .user import Token, TokenData, UserStats +from .user import Token, TokenData, User from .vector import ( IndexArgsHNSW, IndexArgsIVFFlat, @@ -72,15 +68,15 @@ # Completion abstractions "MessageType", # Document abstractions - "DataType", "Document", - "DocumentExtraction", - "DocumentInfo", + "DocumentChunk", + "DocumentResponse", "IngestionStatus", "KGExtractionStatus", "KGEnrichmentStatus", "DocumentType", "RawChunk", + "UnprocessedChunk", # Embedding abstractions "EmbeddingPurpose", "default_embedding_prefixes", @@ -89,12 +85,10 @@ "R2RException", # Graph abstractions "Entity", - "EntityType", - "RelationshipType", "Community", - "CommunityReport", + "Community", "KGExtraction", - "Triple", + "Relationship", # LLM abstractions "GenerationConfig", "LLMChatCompletion", @@ -105,15 +99,15 @@ "Prompt", # Search abstractions "AggregateSearchResult", - "KGSearchResult", - "KGSearchMethod", + "GraphSearchResult", "KGSearchResultType", "KGEntityResult", "KGRelationshipResult", "KGCommunityResult", "KGGlobalResult", - "KGSearchSettings", - "VectorSearchResult", + "GraphSearchSettings", + "ChunkSearchSettings", + "ChunkSearchResult", "SearchSettings", "HybridSearchSettings", # KG abstractions @@ -121,10 +115,13 @@ "KGEnrichmentSettings", "KGExtraction", "KGRunType", + "GraphEntitySettings", + "GraphRelationshipSettings", + "GraphCommunitySettings", # User abstractions "Token", "TokenData", - "UserStats", + "User", # Vector abstractions "Vector", "VectorEntry", diff --git a/py/shared/abstractions/document.py b/py/shared/abstractions/document.py index 28dd089cf..225e9af3b 100644 --- a/py/shared/abstractions/document.py +++ b/py/shared/abstractions/document.py @@ -4,7 +4,7 @@ import logging from datetime import datetime from enum import Enum -from typing import ClassVar, Optional, Union +from typing import Optional from uuid import UUID, uuid4 from pydantic import Field @@ -13,8 +13,6 @@ logger = logging.getLogger() -DataType = Union[str, bytes] - class DocumentType(str, Enum): """Types of documents that can be stored.""" @@ -96,7 +94,7 @@ class DocumentType(str, Enum): class Document(R2RSerializable): id: UUID = Field(default_factory=uuid4) collection_ids: list[UUID] - user_id: UUID + owner_id: UUID document_type: DocumentType metadata: dict @@ -106,6 +104,7 @@ class Config: json_encoders = { UUID: str, } + populate_by_name = True class IngestionStatus(str, Enum): @@ -129,7 +128,7 @@ def __str__(self): @classmethod def table_name(cls) -> str: - return "document_info" + return "documents" @classmethod def id_column(cls) -> str: @@ -150,11 +149,11 @@ def __str__(self): @classmethod def table_name(cls) -> str: - return "document_info" + return "documents" @classmethod def id_column(cls) -> str: - return "document_id" + return "id" class KGEnrichmentStatus(str, Enum): @@ -175,22 +174,22 @@ def table_name(cls) -> str: @classmethod def id_column(cls) -> str: - return "collection_id" + return "id" -class DocumentInfo(R2RSerializable): +class DocumentResponse(R2RSerializable): """Base class for document information handling.""" id: UUID collection_ids: list[UUID] - user_id: UUID + owner_id: UUID document_type: DocumentType metadata: dict title: Optional[str] = None version: str - size_in_bytes: int + size_in_bytes: Optional[int] ingestion_status: IngestionStatus = IngestionStatus.PENDING - kg_extraction_status: KGExtractionStatus = KGExtractionStatus.PENDING + extraction_status: KGExtractionStatus = KGExtractionStatus.PENDING created_at: Optional[datetime] = None updated_at: Optional[datetime] = None ingestion_attempt_number: Optional[int] = None @@ -207,16 +206,16 @@ def convert_to_db_entry(self): embedding = f"[{','.join(str(x) for x in self.summary_embedding)}]" return { - "document_id": self.id, + "id": self.id, "collection_ids": self.collection_ids, - "user_id": self.user_id, + "owner_id": self.owner_id, "document_type": self.document_type, "metadata": json.dumps(self.metadata), "title": self.title or "N/A", "version": self.version, "size_in_bytes": self.size_in_bytes, "ingestion_status": self.ingestion_status.value, - "kg_extraction_status": self.kg_extraction_status.value, + "extraction_status": self.extraction_status.value, "created_at": self.created_at or now, "updated_at": self.updated_at or now, "ingestion_attempt_number": self.ingestion_attempt_number or 0, @@ -225,14 +224,32 @@ def convert_to_db_entry(self): } -class DocumentExtraction(R2RSerializable): +class UnprocessedChunk(R2RSerializable): + """An extraction from a document.""" + + id: Optional[UUID] = None + document_id: Optional[UUID] = None + collection_ids: list[UUID] = [] + metadata: dict = {} + text: str + + +class UpdateChunk(R2RSerializable): + """An extraction from a document.""" + + id: UUID + metadata: Optional[dict] = None + text: str + + +class DocumentChunk(R2RSerializable): """An extraction from a document.""" id: UUID document_id: UUID collection_ids: list[UUID] - user_id: UUID - data: DataType + owner_id: UUID + data: str | bytes metadata: dict diff --git a/py/shared/abstractions/graph.py b/py/shared/abstractions/graph.py index 8ec6f7b63..4bdc9dd62 100644 --- a/py/shared/abstractions/graph.py +++ b/py/shared/abstractions/graph.py @@ -1,333 +1,139 @@ import json -import logging -import uuid from dataclasses import dataclass -from enum import Enum -from typing import Any, Optional, Union +from datetime import datetime +from typing import Any, Optional from uuid import UUID -from pydantic import BaseModel +from pydantic import Field from .base import R2RSerializable -logger = logging.getLogger() - - -@dataclass -class Identified: - """A protocol for an item with an ID.""" - - id: str - """The ID of the item.""" - - short_id: str | None - """Human readable ID used to refer to this community in prompts or texts displayed to users, such as in a report text (optional).""" - - -@dataclass -class Named(Identified): - """A protocol for an item with a name/title.""" - - title: str - """The name/title of the item.""" - - -class EntityType(R2RSerializable): - id: str - name: str - description: str | None = None - - -class RelationshipType(R2RSerializable): - id: str - name: str - description: str | None = None - - -class EntityLevel(str, Enum): - COLLECTION = "collection" - DOCUMENT = "document" - CHUNK = "chunk" - - def __str__(self): - return self.value - class Entity(R2RSerializable): """An entity extracted from a document.""" name: str - id: Optional[int] = None - category: Optional[str] = None description: Optional[str] = None - description_embedding: Optional[Union[list[float], str]] = None - community_numbers: Optional[list[str]] = None - extraction_ids: Optional[list[UUID]] = None - collection_id: Optional[UUID] = None - document_id: Optional[UUID] = None - document_ids: Optional[list[UUID]] = None - # we don't use these yet - # name_embedding: Optional[list[float]] = None - # graph_embedding: Optional[list[float]] = None - # rank: Optional[int] = None - attributes: Optional[Union[dict[str, Any], str]] = None + category: Optional[str] = None + metadata: Optional[dict[str, Any] | str] = None + + id: Optional[UUID] = None + parent_id: Optional[UUID] = None # graph_id | document_id + description_embedding: Optional[list[float] | str] = None + chunk_ids: Optional[list[UUID]] = [] def __str__(self): - return ( - f"{self.category}:{self.subcategory}:{self.value}" - if self.subcategory - else f"{self.category}:{self.value}" - ) + return f"{self.name}:{self.category}" def __init__(self, **kwargs): super().__init__(**kwargs) - if isinstance(self.attributes, str): + if isinstance(self.metadata, str): try: - self.attributes = json.loads(self.attributes) + self.metadata = json.loads(self.metadata) except json.JSONDecodeError: - self.attributes = self.attributes + self.metadata = self.metadata -class Triple(R2RSerializable): +class Relationship(R2RSerializable): """A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities.""" - id: Optional[int] = None - + id: Optional[UUID] = None subject: str - """The source entity name.""" - predicate: str - """A description of the relationship (optional).""" - object: str - """The target entity name.""" - - weight: float | None = 1.0 - """The edge weight.""" - description: str | None = None - """A description of the relationship (optional).""" - - predicate_embedding: list[float] | None = None - """The semantic embedding for the relationship description (optional).""" - - extraction_ids: list[UUID] = [] - """List of text unit IDs in which the relationship appears (optional).""" - - document_id: UUID | None = None - """Document ID in which the relationship appears (optional).""" + subject_id: Optional[UUID] = None + object_id: Optional[UUID] = None + weight: float | None = 1.0 + chunk_ids: Optional[list[UUID]] = [] + parent_id: Optional[UUID] = None + description_embedding: Optional[list[float] | str] = None - attributes: dict[str, Any] | str = {} - """Additional attributes associated with the relationship (optional). To be included in the search prompt""" + metadata: Optional[dict[str, Any] | str] = None def __init__(self, **kwargs): super().__init__(**kwargs) - if isinstance(self.attributes, str): + if isinstance(self.metadata, str): try: - self.attributes = json.loads(self.attributes) + self.metadata = json.loads(self.metadata) except json.JSONDecodeError: - self.attributes = self.attributes - - @classmethod - def from_dict( # type: ignore - cls, - d: dict[str, Any], - id_key: str = "id", - short_id_key: str = "short_id", - source_key: str = "subject", - target_key: str = "object", - predicate_key: str = "predicate", - description_key: str = "description", - weight_key: str = "weight", - extraction_ids_key: str = "extraction_ids", - document_id_key: str = "document_id", - attributes_key: str = "attributes", - ) -> "Triple": - """Create a new relationship from the dict data.""" - - return Triple( - id=d[id_key], - short_id=d.get(short_id_key), - subject=d[source_key], - object=d[target_key], - predicate=d.get(predicate_key), - description=d.get(description_key), - weight=d.get(weight_key, 1.0), - extraction_ids=d.get(extraction_ids_key), - document_id=d.get(document_id_key), - attributes=d.get(attributes_key, {}), - ) + self.metadata = self.metadata @dataclass -class Community(BaseModel): - """A protocol for a community in the system.""" - - id: int | None = None - """The ID of the community.""" - - community_number: int | None = None - """The community number.""" - - collection_id: uuid.UUID | None = None - """The ID of the collection this community is associated with.""" - - level: int | None = None - """Community level.""" +class Community(R2RSerializable): name: str = "" - """The name of the community.""" - summary: str = "" - """Summary of the report.""" + level: Optional[int] = None findings: list[str] = [] - """Findings of the report.""" - + id: Optional[int | UUID] = None + community_id: Optional[UUID] = None + collection_id: Optional[UUID] = None rating: float | None = None - """Rating of the report.""" - rating_explanation: str | None = None - """Explanation of the rating.""" - - embedding: list[float] | None = None - """Embedding of summary and findings.""" - + description_embedding: list[float] | None = None attributes: dict[str, Any] | None = None - """A dictionary of additional attributes associated with the community (optional). To be included in the search prompt.""" + created_at: datetime = Field( + default_factory=datetime.utcnow, + ) + updated_at: datetime = Field( + default_factory=datetime.utcnow, + ) def __init__(self, **kwargs): - super().__init__(**kwargs) - if isinstance(self.attributes, str): - self.attributes = json.loads(self.attributes) - - @classmethod - def from_dict( - cls, - d: dict[str, Any], - id_key: str = "id", - title_key: str = "title", - short_id_key: str = "short_id", - level_key: str = "level", - entities_key: str = "entity_ids", - relationships_key: str = "relationship_ids", - covariates_key: str = "covariate_ids", - attributes_key: str = "attributes", - ) -> "Community": - """Create a new community from the dict data.""" - return Community( - id=d[id_key], - title=d[title_key], - short_id=d.get(short_id_key), - level=d[level_key], - entity_ids=d.get(entities_key), - relationship_ids=d.get(relationships_key), - covariate_ids=d.get(covariates_key), - attributes=d.get(attributes_key), - ) - - -@dataclass -class CommunityInfo(BaseModel): - """A protocol for a community in the system.""" + if isinstance(kwargs.get("attributes", None), str): + kwargs["attributes"] = json.loads(kwargs["attributes"]) - node: str - cluster: int - parent_cluster: int | None - level: int - is_final_cluster: bool - collection_id: uuid.UUID - triple_ids: Optional[list[int]] = None + if isinstance(kwargs.get("embedding", None), str): + kwargs["embedding"] = json.loads(kwargs["embedding"]) - def __init__(self, **kwargs): super().__init__(**kwargs) @classmethod - def from_dict(cls, d: dict[str, Any]) -> "CommunityInfo": - return CommunityInfo( - node=d["node"], - cluster=d["cluster"], - parent_cluster=d["parent_cluster"], - level=d["level"], - is_final_cluster=d["is_final_cluster"], - triple_ids=d["triple_ids"], - collection_id=d["collection_id"], + def from_dict(cls, data: dict[str, Any] | str) -> "Community": + parsed_data: dict[str, Any] = ( + json.loads(data) if isinstance(data, str) else data ) + if isinstance(parsed_data.get("embedding", None), str): + parsed_data["embedding"] = json.loads(parsed_data["embedding"]) + return cls(**parsed_data) -@dataclass -class CommunityReport(BaseModel): - """Defines an LLM-generated summary report of a community.""" - - community_number: int - """The ID of the community this report is associated with.""" - - level: int - """The level of the community this report is associated with.""" - - collection_id: uuid.UUID - """The ID of the collection this report is associated with.""" - - name: str = "" - """Name of the report.""" - - summary: str = "" - """Summary of the report.""" - - findings: list[str] = [] - """Findings of the report.""" - - rating: float | None = None - """Rating of the report.""" - - rating_explanation: str | None = None - """Explanation of the rating.""" +class KGExtraction(R2RSerializable): + """A protocol for a knowledge graph extraction.""" - embedding: list[float] | None = None - """Embedding of summary and findings.""" + entities: list[Entity] + relationships: list[Relationship] - attributes: dict[str, Any] | None = None - """A dictionary of additional attributes associated with the report (optional).""" - def __init__(self, **kwargs): - super().__init__(**kwargs) - if isinstance(self.attributes, str): - self.attributes = json.loads(self.attributes) +class Graph(R2RSerializable): + id: UUID = Field(default=None) + name: str + description: Optional[str] = None + created_at: datetime = Field( + alias="createdAt", + default_factory=datetime.utcnow, + ) + updated_at: datetime = Field( + alias="updatedAt", + default_factory=datetime.utcnow, + ) + status: str = "pending" + + class Config: + populate_by_name = True + from_attributes = True @classmethod - def from_dict( - cls, - d: dict[str, Any], - id_key: str = "id", - title_key: str = "title", - community_number_key: str = "community_number", - short_id_key: str = "short_id", - summary_key: str = "summary", - findings_key: str = "findings", - rank_key: str = "rank", - summary_embedding_key: str = "summary_embedding", - embedding_key: str = "embedding", - attributes_key: str = "attributes", - ) -> "CommunityReport": - """Create a new community report from the dict data.""" - return CommunityReport( - id=d[id_key], - title=d[title_key], - community_number=d[community_number_key], - short_id=d.get(short_id_key), - summary=d[summary_key], - findings=d[findings_key], - rank=d[rank_key], - summary_embedding=d.get(summary_embedding_key), - embedding=d.get(embedding_key), - attributes=d.get(attributes_key), + def from_dict(cls, data: dict[str, Any] | str) -> "Graph": + """Create a Graph instance from a dictionary.""" + # Convert string to dict if needed + parsed_data: dict[str, Any] = ( + json.loads(data) if isinstance(data, str) else data ) + return cls(**parsed_data) - -class KGExtraction(R2RSerializable): - """An extraction from a document that is part of a knowledge graph.""" - - extraction_ids: list[uuid.UUID] - document_id: uuid.UUID - entities: list[Entity] - triples: list[Triple] + def __init__(self, **kwargs): + super().__init__(**kwargs) diff --git a/py/shared/abstractions/kg.py b/py/shared/abstractions/kg.py index 4b9704c08..bb9a06fcf 100644 --- a/py/shared/abstractions/kg.py +++ b/py/shared/abstractions/kg.py @@ -10,12 +10,15 @@ class KGRunType(str, Enum): """Type of KG run.""" ESTIMATE = "estimate" - RUN = "run" + RUN = "run" # deprecated def __str__(self): return self.value +GraphRunType = KGRunType + + class KGEntityDeduplicationType(str, Enum): """Type of KG entity deduplication.""" @@ -30,23 +33,18 @@ def __str__(self): class KGCreationSettings(R2RSerializable): """Settings for knowledge graph creation.""" - kg_triples_extraction_prompt: str = Field( - default="graphrag_triples_extraction_few_shot", + graphrag_relationships_extraction_few_shot: str = Field( + default="graphrag_relationships_extraction_few_shot", description="The prompt to use for knowledge graph extraction.", - alias="graphrag_triples_extraction_few_shot_prompt", # TODO - mark deprecated & remove + alias="graphrag_relationships_extraction_few_shot", # TODO - mark deprecated & remove ) - kg_entity_description_prompt: str = Field( + graph_entity_description_prompt: str = Field( default="graphrag_entity_description", description="The prompt to use for entity description generation.", alias="graphrag_entity_description_prompt", # TODO - mark deprecated & remove ) - force_kg_creation: bool = Field( - default=False, - description="Force run the KG creation step even if the graph is already created.", - ) - entity_types: list[str] = Field( default=[], description="The types of entities to extract.", @@ -57,14 +55,14 @@ class KGCreationSettings(R2RSerializable): description="The types of relations to extract.", ) - extraction_merge_count: int = Field( + chunk_merge_count: int = Field( default=4, description="The number of extractions to merge into a single KG extraction.", ) - max_knowledge_triples: int = Field( + max_knowledge_relationships: int = Field( default=100, - description="The maximum number of knowledge triples to extract from each chunk.", + description="The maximum number of knowledge relationships to extract from each chunk.", ) max_description_input_length: int = Field( @@ -81,7 +79,7 @@ class KGCreationSettings(R2RSerializable): class KGEntityDeduplicationSettings(R2RSerializable): """Settings for knowledge graph entity deduplication.""" - kg_entity_deduplication_type: KGEntityDeduplicationType = Field( + graph_entity_deduplication_type: KGEntityDeduplicationType = Field( default=KGEntityDeduplicationType.BY_NAME, description="The type of entity deduplication to use.", ) @@ -91,7 +89,7 @@ class KGEntityDeduplicationSettings(R2RSerializable): description="The maximum length of the description for a node in the graph.", ) - kg_entity_deduplication_prompt: str = Field( + graph_entity_deduplication_prompt: str = Field( default="graphrag_entity_deduplication", description="The prompt to use for knowledge graph entity deduplication.", ) @@ -110,10 +108,10 @@ class KGEnrichmentSettings(R2RSerializable): description="Force run the enrichment step even if graph creation is still in progress for some documents.", ) - community_reports_prompt: str = Field( - default="graphrag_community_reports", + graphrag_communities: str = Field( + default="graphrag_communities", description="The prompt to use for knowledge graph enrichment.", - alias="community_reports_prompt", # TODO - mark deprecated & remove + alias="graphrag_communities", # TODO - mark deprecated & remove ) max_summary_input_length: int = Field( @@ -130,3 +128,82 @@ class KGEnrichmentSettings(R2RSerializable): default_factory=dict, description="Parameters for the Leiden algorithm.", ) + + +class GraphEntitySettings(R2RSerializable): + """Settings for knowledge graph entity creation.""" + + graph_entity_deduplication_type: KGEntityDeduplicationType = Field( + default=KGEntityDeduplicationType.BY_NAME, + description="The type of entity deduplication to use.", + ) + + max_description_input_length: int = Field( + default=65536, + description="The maximum length of the description for a node in the graph.", + ) + + graph_entity_deduplication_prompt: str = Field( + default="graphrag_entity_deduplication", + description="The prompt to use for knowledge graph entity deduplication.", + ) + + generation_config: GenerationConfig = Field( + default_factory=GenerationConfig, + description="Configuration for text generation during graph entity deduplication.", + ) + + +class GraphRelationshipSettings(R2RSerializable): + """Settings for knowledge graph relationship creation.""" + + pass + + +class GraphCommunitySettings(R2RSerializable): + """Settings for knowledge graph community enrichment.""" + + force_kg_enrichment: bool = Field( + default=False, + description="Force run the enrichment step even if graph creation is still in progress for some documents.", + ) + + graphrag_communities: str = Field( + default="graphrag_communities", + description="The prompt to use for knowledge graph enrichment.", + alias="graphrag_communities", # TODO - mark deprecated & remove + ) + + max_summary_input_length: int = Field( + default=65536, + description="The maximum length of the summary for a community.", + ) + + generation_config: GenerationConfig = Field( + default_factory=GenerationConfig, + description="Configuration for text generation during graph enrichment.", + ) + + leiden_params: dict = Field( + default_factory=dict, + description="Parameters for the Leiden algorithm.", + ) + + +class GraphBuildSettings(R2RSerializable): + """Settings for knowledge graph build.""" + + entity_settings: GraphEntitySettings = Field( + default=GraphEntitySettings(), + description="Settings for knowledge graph entity creation.", + ) + + relationship_settings: GraphRelationshipSettings = Field( + default=GraphRelationshipSettings(), + description="Settings for knowledge graph relationship creation.", + ) + + community_settings: GraphCommunitySettings = Field( + default=GraphCommunitySettings(), + description="Settings for knowledge graph community enrichment.", + ) diff --git a/py/shared/abstractions/llm.py b/py/shared/abstractions/llm.py index 477801f5c..2f747953e 100644 --- a/py/shared/abstractions/llm.py +++ b/py/shared/abstractions/llm.py @@ -2,7 +2,7 @@ import json from enum import Enum -from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union +from typing import TYPE_CHECKING, Any, ClassVar, Optional from openai.types.chat import ChatCompletion, ChatCompletionChunk from pydantic import BaseModel, Field @@ -51,12 +51,14 @@ class GenerationConfig(R2RSerializable): default_factory=lambda: GenerationConfig._defaults["temperature"] ) top_p: float = Field( - default_factory=lambda: GenerationConfig._defaults["top_p"] + default_factory=lambda: GenerationConfig._defaults["top_p"], + alias="topP", ) max_tokens_to_sample: int = Field( default_factory=lambda: GenerationConfig._defaults[ "max_tokens_to_sample" - ] + ], + alias="maxTokensToSample", ) stream: bool = Field( default_factory=lambda: GenerationConfig._defaults["stream"] @@ -70,12 +72,14 @@ class GenerationConfig(R2RSerializable): add_generation_kwargs: Optional[dict] = Field( default_factory=lambda: GenerationConfig._defaults[ "add_generation_kwargs" - ] + ], + alias="addGenerationKwargs", ) api_base: Optional[str] = Field( - default_factory=lambda: GenerationConfig._defaults["api_base"] + default_factory=lambda: GenerationConfig._defaults["api_base"], + alias="apiBase", ) - response_format: Optional[Union[dict, BaseModel]] = None + response_format: Optional[dict | BaseModel] = None @classmethod def set_default(cls, **kwargs): @@ -112,6 +116,7 @@ def __str__(self): return json.dumps(self.to_dict()) class Config: + populate_by_name = True json_schema_extra = { "model": "openai/gpt-4o", "temperature": 0.1, @@ -137,13 +142,14 @@ def __str__(self): class Message(R2RSerializable): - role: Union[MessageType, str] + role: MessageType | str content: Optional[str] = None name: Optional[str] = None function_call: Optional[dict[str, Any]] = None tool_calls: Optional[list[dict[str, Any]]] = None class Config: + populate_by_name = True json_schema_extra = { "role": "user", "content": "This is a test message.", diff --git a/py/shared/abstractions/prompt.py b/py/shared/abstractions/prompt.py index 7c886c041..85ab53128 100644 --- a/py/shared/abstractions/prompt.py +++ b/py/shared/abstractions/prompt.py @@ -13,7 +13,7 @@ class Prompt(BaseModel): """A prompt that can be formatted with inputs.""" - prompt_id: UUID = Field(default_factory=uuid4) + id: UUID = Field(default_factory=uuid4) name: str template: str input_types: dict[str, str] diff --git a/py/shared/abstractions/search.py b/py/shared/abstractions/search.py index c8170f15d..01a0e223b 100644 --- a/py/shared/abstractions/search.py +++ b/py/shared/abstractions/search.py @@ -1,39 +1,38 @@ """Abstractions for search functionality.""" from enum import Enum -from typing import Any, Optional, Union +from typing import Any, Optional from uuid import UUID from pydantic import Field from .base import R2RSerializable -from .graph import EntityLevel from .llm import GenerationConfig from .vector import IndexMeasure -class VectorSearchResult(R2RSerializable): +class ChunkSearchResult(R2RSerializable): """Result of a search operation.""" - extraction_id: UUID + id: UUID document_id: UUID - user_id: Optional[UUID] + owner_id: Optional[UUID] collection_ids: list[UUID] score: float text: str metadata: dict[str, Any] def __str__(self) -> str: - return f"VectorSearchResult(id={self.extraction_id}, document_id={self.document_id}, score={self.score})" + return f"ChunkSearchResult(id={self.id}, document_id={self.document_id}, score={self.score})" def __repr__(self) -> str: return self.__str__() def as_dict(self) -> dict: return { - "extraction_id": self.extraction_id, + "id": self.id, "document_id": self.document_id, - "user_id": self.user_id, + "owner_id": self.owner_id, "collection_ids": self.collection_ids, "score": self.score, "text": self.text, @@ -41,10 +40,11 @@ def as_dict(self) -> dict: } class Config: + populate_by_name = True json_schema_extra = { - "extraction_id": "3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09", + "id": "3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "owner_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], "score": 0.23943702876567796, "text": "Example text from the document", @@ -61,10 +61,6 @@ class KGSearchResultType(str, Enum): COMMUNITY = "community" -class KGSearchMethod(str, Enum): - LOCAL = "local" - - class KGEntityResult(R2RSerializable): name: str description: str @@ -79,9 +75,15 @@ class Config: class KGRelationshipResult(R2RSerializable): - name: str - description: str + # name: str + subject: str + predicate: str + object: str metadata: Optional[dict[str, Any]] = None + score: Optional[float] = None + # name: str + # description: str + # metadata: Optional[dict[str, Any]] = None class Config: json_schema_extra = { @@ -123,21 +125,23 @@ class Config: } -class KGSearchResult(R2RSerializable): - method: KGSearchMethod - content: Union[ - KGEntityResult, KGRelationshipResult, KGCommunityResult, KGGlobalResult - ] +class GraphSearchResult(R2RSerializable): + content: ( + KGEntityResult + | KGRelationshipResult + | KGCommunityResult + | KGGlobalResult + ) result_type: Optional[KGSearchResultType] = None - extraction_ids: Optional[list[UUID]] = None + chunk_ids: Optional[list[UUID]] = None metadata: dict[str, Any] = {} + score: Optional[float] = None class Config: json_schema_extra = { - "method": "local", "content": KGEntityResult.Config.json_schema_extra, "result_type": "entity", - "extraction_ids": ["c68dc72e-fc23-5452-8f49-d7bd46088a96"], + "chunk_ids": ["c68dc72e-fc23-5452-8f49-d7bd46088a96"], "metadata": {"associated_query": "What is the capital of France?"}, } @@ -145,27 +149,40 @@ class Config: class AggregateSearchResult(R2RSerializable): """Result of an aggregate search operation.""" - vector_search_results: Optional[list[VectorSearchResult]] - kg_search_results: Optional[list[KGSearchResult]] = None + chunk_search_results: Optional[list[ChunkSearchResult]] + graph_search_results: Optional[list[GraphSearchResult]] = None def __str__(self) -> str: - return f"AggregateSearchResult(vector_search_results={self.vector_search_results}, kg_search_results={self.kg_search_results})" + return f"AggregateSearchResult(chunk_search_results={self.chunk_search_results}, graph_search_results={self.graph_search_results})" def __repr__(self) -> str: - return f"AggregateSearchResult(vector_search_results={self.vector_search_results}, kg_search_results={self.kg_search_results})" + return f"AggregateSearchResult(chunk_search_results={self.chunk_search_results}, graph_search_results={self.graph_search_results})" def as_dict(self) -> dict: return { - "vector_search_results": ( - [result.as_dict() for result in self.vector_search_results] - if self.vector_search_results + "chunk_search_results": ( + [result.as_dict() for result in self.chunk_search_results] + if self.chunk_search_results else [] ), - "kg_search_results": self.kg_search_results or None, + "graph_search_results": self.graph_search_results or None, } +from enum import Enum +from typing import Any, Optional +from uuid import UUID + +from pydantic import Field + +from .base import R2RSerializable +from .llm import GenerationConfig +from .vector import IndexMeasure + + class HybridSearchSettings(R2RSerializable): + """Settings for hybrid search combining full-text and semantic search.""" + full_text_weight: float = Field( default=1.0, description="Weight to apply to full text search" ) @@ -181,22 +198,94 @@ class HybridSearchSettings(R2RSerializable): ) -class SearchSettings(R2RSerializable): - use_vector_search: bool = Field( - default=True, description="Whether to use vector search" +class ChunkSearchSettings(R2RSerializable): + """Settings specific to chunk/vector search.""" + + index_measure: IndexMeasure = Field( + alias="indexMeasure", + default=IndexMeasure.cosine_distance, + description="The distance measure to use for indexing", + ) + probes: int = Field( + default=10, + description="Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.", + ) + ef_search: int = Field( + alias="efSearch", + default=40, + description="Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.", + ) + enabled: bool = Field( + default=True, + description="Whether to enable chunk search", + ) + + +class GraphSearchSettings(R2RSerializable): + """Settings specific to knowledge graph search.""" + + kg_search_level: Optional[str] = Field( + alias="kgSearchLevel", + default=None, + description="KG search level", + ) + generation_config: GenerationConfig = Field( + alias="generationConfig", + default_factory=GenerationConfig, + description="Configuration for text generation during graph search.", + ) + graphrag_map_system: str = Field( + alias="graphragMapSystem", + default="graphrag_map_system", + description="The system prompt for the graphrag map prompt.", + ) + graphrag_reduce_system: str = Field( + alias="graphragReduceSystem", + default="graphrag_reduce_system", + description="The system prompt for the graphrag reduce prompt.", + ) + max_community_description_length: int = Field( + alias="maxCommunityDescriptionLength", + default=65536, ) + max_llm_queries_for_global_search: int = Field( + alias="maxLLMQueriesForGlobalSearch", + default=250, + ) + limits: dict[str, int] = Field( + alias="localSearchLimits", + default={}, + ) + enabled: bool = Field( + default=True, + description="Whether to enable graph search", + ) + + +class SearchSettings(R2RSerializable): + """Main search settings class that combines shared settings with specialized settings for chunks and KG.""" + + # Search type flags use_hybrid_search: bool = Field( default=False, - description="Whether to perform a hybrid search (combining vector and keyword search)", + description="Whether to perform a hybrid search. This is equivalent to setting `use_semantic_search=True` and `use_fulltext_search=True`, e.g. combining vector and keyword search.", + alias="useHybridSearch", ) - filters: dict[str, Any] = Field( - default_factory=dict, - description="Alias for search_filters", - deprecated=True, + use_semantic_search: bool = Field( + default=True, + description="Whether to use semantic search", + alias="useSemanticSearch", + ) + use_fulltext_search: bool = Field( + default=False, + description="Whether to use full-text search", + alias="useFulltextSearch", ) - search_filters: dict[str, Any] = Field( + + # Common search parameters + filters: dict[str, Any] = Field( default_factory=dict, - description="""Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. + description="""Filters to apply to the search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. Commonly seen filters include operations include the following: @@ -208,7 +297,7 @@ class SearchSettings(R2RSerializable): `{"$and": {"$document_id": ..., "collection_ids": ...}}`""", ) - search_limit: int = Field( + limit: int = Field( default=10, description="Maximum number of results to return", ge=1, @@ -219,175 +308,85 @@ class SearchSettings(R2RSerializable): ge=0, description="Offset to paginate search results", ) - selected_collection_ids: list[UUID] = Field( - default_factory=list, - description="Collection IDs to search for", - ) - index_measure: IndexMeasure = Field( - default=IndexMeasure.cosine_distance, - description="The distance measure to use for indexing", - ) - include_values: bool = Field( - default=True, - description="Whether to include search score values in the search results", - ) include_metadatas: bool = Field( + alias="includeMetadatas", default=True, description="Whether to include element metadata in the search results", ) - probes: int = Field( - default=10, - description="Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.", - ) - ef_search: int = Field( - default=40, - description="Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.", - ) - hybrid_search_settings: HybridSearchSettings = Field( - default=HybridSearchSettings(), - description="Settings for hybrid search", + include_scores: bool = Field( + alias="includeScores", + default=True, + description="Whether to include search score values in the search results", ) + + # Search strategy and settings search_strategy: str = Field( + alias="searchStrategy", default="vanilla", - description="Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')", + description="Search strategy to use (e.g., 'vanilla', 'query_fusion', 'hyde')", + ) + hybrid_settings: HybridSearchSettings = Field( + alias="hybridSearchSettings", + default_factory=HybridSearchSettings, + description="Settings for hybrid search (only used if `use_semantic_search` and `use_fulltext_search` are both true)", + ) + + # Specialized settings + chunk_settings: ChunkSearchSettings = Field( + default_factory=ChunkSearchSettings, + description="Settings specific to chunk/vector search", + ) + graph_settings: GraphSearchSettings = Field( + default_factory=GraphSearchSettings, + description="Settings specific to knowledge graph search", ) class Config: + populate_by_name = True json_encoders = {UUID: str} json_schema_extra = { - "use_vector_search": True, - "use_hybrid_search": True, + "use_semantic_search": True, + "use_fulltext_search": False, + "use_hybrid_search": False, "filters": {"category": "technology"}, "limit": 20, "offset": 0, - "selected_collection_ids": [ - "2acb499e-8428-543b-bd85-0d9098718220", - "3e157b3a-8469-51db-90d9-52e7d896b49b", - ], - "index_measure": "cosine_distance", - "include_metadata": True, - "probes": 10, - "ef_search": 40, - "hybrid_search_settings": { + "search_strategy": "vanilla", + "hybrid_settings": { "full_text_weight": 1.0, "semantic_weight": 5.0, "full_text_limit": 200, "rrf_k": 50, }, - } - - def model_dump(self, *args, **kwargs): - dump = super().model_dump(*args, **kwargs) - dump["selected_collection_ids"] = [ - str(uuid) for uuid in dump["selected_collection_ids"] - ] - return dump - - def __init__(self, **data): - # Either filters or search filters is supported - data["filters"] = { - **data.get("filters", {}), - **data.get("search_filters", {}), - } - data["search_filters"] = { - **data.get("filters", {}), - **data.get("search_filters", {}), - } - super().__init__(**data) - - -class KGSearchSettings(R2RSerializable): - - entities_level: EntityLevel = Field( - default=EntityLevel.DOCUMENT, - description="The level of entities to search for", - ) - - filters: dict[str, Any] = Field( - default_factory=dict, - description="Alias for search_filters", - deprecated=True, - ) - - search_filters: dict[str, Any] = Field( - default_factory=dict, - description="""Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`. - - Commonly seen filters include operations include the following: - - `{"document_id": {"$eq": "9fbe403b-..."}}` - - `{"document_id": {"$in": ["9fbe403b-...", "3e157b3a-..."]}}` - - `{"collection_ids": {"$overlap": ["122fdf6a-...", "..."]}}` - - `{"$and": {"$document_id": ..., "collection_ids": ...}}`""", - ) - - selected_collection_ids: list[UUID] = Field( - default_factory=list, - description="Collection IDs to search for", - ) - - graphrag_map_system: str = Field( - default="graphrag_map_system", - description="The system prompt for the graphrag map prompt.", - ) - - graphrag_reduce_system: str = Field( - default="graphrag_reduce_system", - description="The system prompt for the graphrag reduce prompt.", - ) - - use_kg_search: bool = Field( - default=False, description="Whether to use KG search" - ) - kg_search_type: str = Field( - default="local", description="KG search type" - ) # 'global' or 'local' - kg_search_level: Optional[str] = Field( - default=None, description="KG search level" - ) - generation_config: GenerationConfig = Field( - default_factory=GenerationConfig, - description="Configuration for text generation during graph search.", - ) - - # TODO: add these back in - # entity_types: list = [] - # relationships: list = [] - max_community_description_length: int = 65536 - max_llm_queries_for_global_search: int = 250 - local_search_limits: dict[str, int] = { - "__Entity__": 20, - "__Relationship__": 20, - "__Community__": 20, - } - - class Config: - json_encoders = {UUID: str} - json_schema_extra = { - "use_kg_search": True, - "kg_search_type": "local", - "kg_search_level": "0", - "generation_config": GenerationConfig.Config.json_schema_extra, - "max_community_description_length": 65536, - "max_llm_queries_for_global_search": 250, - "local_search_limits": { - "__Entity__": 20, - "__Relationship__": 20, - "__Community__": 20, + "chunk_settings": { + "enabled": True, + "index_measure": "cosine_distance", + "include_metadata": True, + "probes": 10, + "ef_search": 40, + }, + "graph_settings": { + "enabled": True, + "kg_search_level": "0", + "generation_config": GenerationConfig.Config.json_schema_extra, + "max_community_description_length": 65536, + "max_llm_queries_for_global_search": 250, + "limits": { + "entity": 20, + "relationship": 20, + "community": 20, + }, }, } def __init__(self, **data): - # Either filters or search filters is supported + # Handle legacy search_filters field data["filters"] = { **data.get("filters", {}), **data.get("search_filters", {}), } - data["search_filters"] = { - **data.get("filters", {}), - **data.get("search_filters", {}), - } super().__init__(**data) + + def model_dump(self, *args, **kwargs): + dump = super().model_dump(*args, **kwargs) + return dump diff --git a/py/shared/abstractions/user.py b/py/shared/abstractions/user.py index 7cdaa625e..d6ad8e2fc 100644 --- a/py/shared/abstractions/user.py +++ b/py/shared/abstractions/user.py @@ -4,6 +4,8 @@ from pydantic import BaseModel, Field +from shared.abstractions import R2RSerializable + from ..utils import generate_default_user_collection_id @@ -11,10 +13,17 @@ class Collection(BaseModel): id: UUID = Field(default=None) name: str description: Optional[str] = None - created_at: datetime = Field(default_factory=datetime.utcnow) - updated_at: datetime = Field(default_factory=datetime.utcnow) + created_at: datetime = Field( + alias="createdAt", + default_factory=datetime.utcnow, + ) + updated_at: datetime = Field( + alias="updatedAt", + default_factory=datetime.utcnow, + ) class Config: + populate_by_name = True from_attributes = True def __init__(self, **data): @@ -34,15 +43,23 @@ class TokenData(BaseModel): exp: Optional[datetime] = None -class UserStats(BaseModel): - user_id: UUID +class User(R2RSerializable): + id: UUID email: str - is_superuser: bool - is_active: bool - is_verified: bool - created_at: datetime - updated_at: datetime - collection_ids: list[UUID] - num_files: int - total_size_in_bytes: int - document_ids: list[UUID] + is_active: bool = True + is_superuser: bool = False + created_at: datetime = datetime.now() + updated_at: datetime = datetime.now() + is_verified: bool = False + collection_ids: list[UUID] = [] + graph_ids: list[UUID] = [] + document_ids: list[UUID] = [] + + # Optional fields (to update or set at creation) + hashed_password: Optional[str] = None + verification_code_expiry: Optional[datetime] = None + name: Optional[str] = None + bio: Optional[str] = None + profile_picture: Optional[str] = None + total_size_in_bytes: Optional[int] = None + num_files: Optional[int] = None diff --git a/py/shared/abstractions/vector.py b/py/shared/abstractions/vector.py index 7b7150f1d..c755c96c2 100644 --- a/py/shared/abstractions/vector.py +++ b/py/shared/abstractions/vector.py @@ -4,7 +4,7 @@ from typing import Any, Optional from uuid import UUID -from pydantic import Field +from pydantic import BaseModel, Field from .base import R2RSerializable @@ -114,12 +114,12 @@ class VectorTableName(str, Enum): This enum represents the different tables where we store vectors. """ - VECTORS = "vectors" - ENTITIES_DOCUMENT = "document_entity" - ENTITIES_COLLECTION = "collection_entity" - # TODO: Add support for triples - # TRIPLES = "chunk_triple" - COMMUNITIES = "community_report" + CHUNKS = "chunks" + ENTITIES_DOCUMENT = "documents_entities" + GRAPHS_ENTITIES = "graphs_entities" + # TODO: Add support for relationships + # TRIPLES = "relationship" + COMMUNITIES = "graphs_communities" def __str__(self) -> str: return self.value @@ -188,9 +188,9 @@ def __repr__(self) -> str: class VectorEntry(R2RSerializable): """A vector entry that can be stored directly in supported vector databases.""" - extraction_id: UUID + id: UUID document_id: UUID - user_id: UUID + owner_id: UUID collection_ids: list[UUID] vector: Vector text: str @@ -200,9 +200,9 @@ def __str__(self) -> str: """Return a string representation of the VectorEntry.""" return ( f"VectorEntry(" - f"extraction_id={self.extraction_id}, " + f"chunk_id={self.id}, " f"document_id={self.document_id}, " - f"user_id={self.user_id}, " + f"user_id={self.owner_id}, " f"collection_ids={self.collection_ids}, " f"vector={self.vector}, " f"text={self.text}, " @@ -229,3 +229,47 @@ def __str__(self) -> str: def __repr__(self) -> str: """Return an unambiguous string representation of the StorageResult.""" return self.__str__() + + +class IndexConfig(BaseModel): + # table_name: Optional[VectorTableName] = Body( + # default=VectorTableName.CHUNKS, + # description=create_vector_descriptions.get("table_name"), + # ), + # index_method: IndexMethod = Body( + # default=IndexMethod.hnsw, + # description=create_vector_descriptions.get("index_method"), + # ), + # index_measure: IndexMeasure = Body( + # default=IndexMeasure.cosine_distance, + # description=create_vector_descriptions.get("index_measure"), + # ), + # index_arguments: Optional[ + # IndexArgsIVFFlat | IndexArgsHNSW + # ] = Body( + # None, + # description=create_vector_descriptions.get("index_arguments"), + # ), + # index_name: Optional[str] = Body( + # None, + # description=create_vector_descriptions.get("index_name"), + # ), + # index_column: Optional[str] = Body( + # None, + # description=create_vector_descriptions.get("index_column"), + # ), + # concurrently: bool = Body( + # default=True, + # description=create_vector_descriptions.get("concurrently"), + # ), + # auth_user=Depends(self.service.providers.auth.auth_wrapper), + name: Optional[str] = Field(default=None) + table_name: Optional[str] = Field(default=VectorTableName.CHUNKS) + index_method: Optional[str] = Field(default=IndexMethod.hnsw) + index_measure: Optional[str] = Field(default=IndexMeasure.cosine_distance) + index_arguments: Optional[IndexArgsIVFFlat | IndexArgsHNSW] = Field( + default=None + ) + index_name: Optional[str] = Field(default=None) + index_column: Optional[str] = Field(default=None) + concurrently: Optional[bool] = Field(default=True) diff --git a/py/shared/api/models/__init__.py b/py/shared/api/models/__init__.py index cc982cec8..1171d8318 100644 --- a/py/shared/api/models/__init__.py +++ b/py/shared/api/models/__init__.py @@ -1,10 +1,14 @@ from shared.api.models.auth.responses import ( - GenericMessageResponse, TokenResponse, - UserResponse, - WrappedGenericMessageResponse, WrappedTokenResponse, - WrappedUserResponse, +) +from shared.api.models.base import ( + GenericBooleanResponse, + GenericMessageResponse, + PaginatedResultsWrapper, + ResultsWrapper, + WrappedBooleanResponse, + WrappedGenericMessageResponse, ) from shared.api.models.ingestion.responses import ( IngestionResponse, @@ -13,111 +17,110 @@ WrappedUpdateResponse, ) from shared.api.models.kg.responses import ( - KGCreationResponse, - KGEnrichmentResponse, - KGEntityDeduplicationResponse, - WrappedKGCreationResponse, - WrappedKGEnrichmentResponse, - WrappedKGEntityDeduplicationResponse, + GraphResponse, + WrappedGraphResponse, + WrappedGraphsResponse, ) from shared.api.models.management.responses import ( AnalyticsResponse, - AppSettingsResponse, - CollectionOverviewResponse, + ChunkResponse, CollectionResponse, - ConversationOverviewResponse, - DocumentChunkResponse, - DocumentOverviewResponse, + ConversationResponse, LogResponse, PromptResponse, - ScoreCompletionResponse, ServerStats, - UserOverviewResponse, - WrappedAddUserResponse, + SettingsResponse, WrappedAnalyticsResponse, - WrappedAppSettingsResponse, - WrappedCollectionListResponse, - WrappedCollectionOverviewResponse, + WrappedChunkResponse, + WrappedChunksResponse, WrappedCollectionResponse, + WrappedCollectionsResponse, + WrappedConversationMessagesResponse, WrappedConversationResponse, - WrappedConversationsOverviewResponse, - WrappedDocumentChunkResponse, - WrappedDocumentOverviewResponse, - WrappedGetPromptsResponse, - WrappedLogResponse, - WrappedPromptMessageResponse, + WrappedConversationsResponse, + WrappedDocumentResponse, + WrappedDocumentsResponse, + WrappedLogsResponse, + WrappedPromptResponse, + WrappedPromptsResponse, WrappedServerStatsResponse, - WrappedUserCollectionResponse, - WrappedUserOverviewResponse, - WrappedUsersInCollectionResponse, + WrappedSettingsResponse, + WrappedUserResponse, + WrappedUsersResponse, ) from shared.api.models.retrieval.responses import ( - RAGAgentResponse, + AgentResponse, + CombinedSearchResponse, RAGResponse, - SearchResponse, + WrappedAgentResponse, WrappedDocumentSearchResponse, - WrappedRAGAgentResponse, WrappedRAGResponse, WrappedSearchResponse, + WrappedVectorSearchResponse, ) __all__ = [ # Auth Responses "GenericMessageResponse", "TokenResponse", - "UserResponse", "WrappedTokenResponse", - "WrappedUserResponse", "WrappedGenericMessageResponse", # Ingestion Responses "IngestionResponse", "WrappedIngestionResponse", "WrappedUpdateResponse", "WrappedMetadataUpdateResponse", - # Restructure Responses - "KGCreationResponse", - "WrappedKGCreationResponse", - "KGEnrichmentResponse", - "WrappedKGEnrichmentResponse", + # TODO: Need to review anything above this + "GraphResponse", + "WrappedGraphResponse", + "WrappedGraphsResponse", # Management Responses "PromptResponse", "ServerStats", "LogResponse", "AnalyticsResponse", - "AppSettingsResponse", - "ScoreCompletionResponse", - "UserOverviewResponse", - "DocumentOverviewResponse", - "DocumentChunkResponse", + "SettingsResponse", + "ChunkResponse", "CollectionResponse", - "CollectionOverviewResponse", - "ConversationOverviewResponse", - "WrappedPromptMessageResponse", + "ConversationResponse", "WrappedServerStatsResponse", - "WrappedLogResponse", + "WrappedLogsResponse", "WrappedAnalyticsResponse", - "WrappedAppSettingsResponse", - "WrappedUserOverviewResponse", - "WrappedConversationResponse", - "WrappedDocumentChunkResponse", - "WrappedDocumentOverviewResponse", - "WrappedDocumentChunkResponse", + "WrappedSettingsResponse", + # Document Responses + "WrappedDocumentResponse", + "WrappedDocumentsResponse", + # Collection Responses "WrappedCollectionResponse", - "WrappedDocumentChunkResponse", - "WrappedCollectionListResponse", - "WrappedAddUserResponse", - "WrappedUsersInCollectionResponse", - "WrappedGetPromptsResponse", - "WrappedUserCollectionResponse", - "WrappedDocumentChunkResponse", - "WrappedCollectionOverviewResponse", - "WrappedConversationsOverviewResponse", + "WrappedCollectionsResponse", + # Prompt Responses + "WrappedPromptResponse", + "WrappedPromptsResponse", + # Chunk Responses + "WrappedChunkResponse", + "WrappedChunksResponse", + # Conversation Responses + "WrappedConversationMessagesResponse", + "WrappedConversationResponse", + "WrappedConversationsResponse", + # User Responses + "WrappedUserResponse", + "WrappedUsersResponse", + # Base Responses + "PaginatedResultsWrapper", + "ResultsWrapper", + "GenericBooleanResponse", + "GenericMessageResponse", + "WrappedBooleanResponse", + "WrappedGenericMessageResponse", + # TODO: Clean up the following responses # Retrieval Responses - "SearchResponse", + "CombinedSearchResponse", "RAGResponse", - "RAGAgentResponse", + "WrappedRAGResponse", + "AgentResponse", "WrappedSearchResponse", "WrappedDocumentSearchResponse", - "WrappedRAGResponse", - "WrappedRAGAgentResponse", + "WrappedVectorSearchResponse", + "WrappedAgentResponse", ] diff --git a/py/shared/api/models/auth/responses.py b/py/shared/api/models/auth/responses.py index 9e868272d..ccb44687f 100644 --- a/py/shared/api/models/auth/responses.py +++ b/py/shared/api/models/auth/responses.py @@ -1,10 +1,6 @@ -from datetime import datetime -from typing import Optional -from uuid import UUID - from pydantic import BaseModel -from shared.abstractions import R2RSerializable, Token +from shared.abstractions import Token from shared.api.models.base import ResultsWrapper @@ -13,29 +9,5 @@ class TokenResponse(BaseModel): refresh_token: Token -class UserResponse(R2RSerializable): - id: UUID - email: str - is_active: bool = True - is_superuser: bool = False - created_at: datetime = datetime.now() - updated_at: datetime = datetime.now() - is_verified: bool = False - collection_ids: list[UUID] = [] - - # Optional fields (to update or set at creation) - hashed_password: Optional[str] = None - verification_code_expiry: Optional[datetime] = None - name: Optional[str] = None - bio: Optional[str] = None - profile_picture: Optional[str] = None - - -class GenericMessageResponse(BaseModel): - message: str - - # Create wrapped versions of each response WrappedTokenResponse = ResultsWrapper[TokenResponse] -WrappedUserResponse = ResultsWrapper[UserResponse] -WrappedGenericMessageResponse = ResultsWrapper[GenericMessageResponse] diff --git a/py/shared/api/models/base.py b/py/shared/api/models/base.py index 092728bc3..afb480a93 100644 --- a/py/shared/api/models/base.py +++ b/py/shared/api/models/base.py @@ -12,3 +12,15 @@ class ResultsWrapper(BaseModel, Generic[T]): class PaginatedResultsWrapper(BaseModel, Generic[T]): results: T total_entries: int + + +class GenericBooleanResponse(BaseModel): + success: bool + + +class GenericMessageResponse(BaseModel): + message: str + + +WrappedBooleanResponse = ResultsWrapper[GenericBooleanResponse] +WrappedGenericMessageResponse = ResultsWrapper[GenericMessageResponse] diff --git a/py/shared/api/models/ingestion/responses.py b/py/shared/api/models/ingestion/responses.py index 1abfd4ef8..b6cb61874 100644 --- a/py/shared/api/models/ingestion/responses.py +++ b/py/shared/api/models/ingestion/responses.py @@ -3,7 +3,7 @@ from pydantic import BaseModel, Field -from shared.api.models.base import ResultsWrapper +from shared.api.models.base import PaginatedResultsWrapper, ResultsWrapper T = TypeVar("T") @@ -56,29 +56,15 @@ class Config: } -class CreateVectorIndexResponse(BaseModel): - message: str = Field( - ..., - description="Vector index creation queued successfully.", - ) - - +# TODO: This can probably be cleaner class ListVectorIndicesResponse(BaseModel): indices: list[dict[str, Any]] -class DeleteVectorIndexResponse(BaseModel): - message: str - - -class SelectVectorIndexResponse(BaseModel): - status: str - - -WrappedIngestionResponse = ResultsWrapper[list[IngestionResponse]] +WrappedIngestionResponse = ResultsWrapper[IngestionResponse] WrappedMetadataUpdateResponse = ResultsWrapper[IngestionResponse] WrappedUpdateResponse = ResultsWrapper[UpdateResponse] -WrappedCreateVectorIndexResponse = ResultsWrapper[CreateVectorIndexResponse] -WrappedListVectorIndicesResponse = ResultsWrapper[ListVectorIndicesResponse] -WrappedDeleteVectorIndexResponse = ResultsWrapper[DeleteVectorIndexResponse] -WrappedSelectVectorIndexResponse = ResultsWrapper[SelectVectorIndexResponse] + +WrappedListVectorIndicesResponse = PaginatedResultsWrapper[ + ListVectorIndicesResponse +] diff --git a/py/shared/api/models/kg/responses.py b/py/shared/api/models/kg/responses.py index 3438b26e1..bbb946553 100644 --- a/py/shared/api/models/kg/responses.py +++ b/py/shared/api/models/kg/responses.py @@ -1,315 +1,31 @@ -from typing import Optional, Union +from datetime import datetime +from typing import Optional from uuid import UUID -from pydantic import BaseModel, Field +from pydantic import BaseModel -from shared.abstractions.base import R2RSerializable -from shared.abstractions.graph import CommunityReport, Entity, Triple -from shared.api.models.base import ResultsWrapper +from shared.abstractions.graph import Community, Entity, Relationship +from shared.api.models.base import PaginatedResultsWrapper, ResultsWrapper +WrappedEntityResponse = ResultsWrapper[Entity] +WrappedEntitiesResponse = PaginatedResultsWrapper[list[Entity]] +WrappedRelationshipResponse = ResultsWrapper[Relationship] +WrappedRelationshipsResponse = PaginatedResultsWrapper[list[Relationship]] +WrappedCommunityResponse = ResultsWrapper[Community] +WrappedCommunitiesResponse = PaginatedResultsWrapper[list[Community]] -class KGCreationResponse(BaseModel): - message: str = Field( - ..., - description="A message describing the result of the KG creation request.", - ) - task_id: UUID = Field( - ..., - description="The task ID of the KG creation request.", - ) +class GraphResponse(BaseModel): + id: UUID + collection_id: UUID + name: str + description: Optional[str] + status: str + created_at: datetime + updated_at: datetime + document_ids: list[UUID] -class Config: - json_schema_extra = { - "example": { - "message": "Graph creation queued successfully.", - "task_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", - } - } - -class KGEnrichmentResponse(BaseModel): - message: str = Field( - ..., - description="A message describing the result of the KG enrichment request.", - ) - task_id: UUID = Field( - ..., - description="The task ID of the KG enrichment request.", - ) - - class Config: - json_schema_extra = { - "example": { - "message": "Graph enrichment queued successfuly.", - "task_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", - } - } - - -class KGCreationEstimationResponse(R2RSerializable): - """Response for knowledge graph creation estimation.""" - - message: str = Field( - default="", - description="The message to display to the user.", - ) - - document_count: Optional[int] = Field( - default=None, - description="The number of documents in the collection.", - ) - - number_of_jobs_created: Optional[int] = Field( - default=None, - description="The number of jobs created for the graph creation process.", - ) - - total_chunks: Optional[int] = Field( - default=None, - description="The estimated total number of chunks.", - ) - - estimated_entities: Optional[str] = Field( - default=None, - description="The estimated number of entities in the graph.", - ) - - estimated_triples: Optional[str] = Field( - default=None, - description="The estimated number of triples in the graph.", - ) - - estimated_llm_calls: Optional[str] = Field( - default=None, - description="The estimated number of LLM calls in millions.", - ) - - estimated_total_in_out_tokens_in_millions: Optional[str] = Field( - default=None, - description="The estimated total number of input and output tokens in millions.", - ) - - estimated_total_time_in_minutes: Optional[str] = Field( - default=None, - description="The estimated total time to run the graph creation process in minutes.", - ) - - estimated_cost_in_usd: Optional[str] = Field( - default=None, - description="The estimated cost to run the graph creation process in USD.", - ) - - -class KGDeduplicationEstimationResponse(R2RSerializable): - """Response for knowledge graph deduplication estimation.""" - - message: str = Field( - default="", - description="The message to display to the user.", - ) - - num_entities: Optional[int] = Field( - default=None, - description="The number of entities in the collection.", - ) - - estimated_llm_calls: Optional[str] = Field( - default=None, - description="The estimated number of LLM calls.", - ) - - estimated_total_in_out_tokens_in_millions: Optional[str] = Field( - default=None, - description="The estimated total number of input and output tokens in millions.", - ) - - estimated_cost_in_usd: Optional[str] = Field( - default=None, - description="The estimated cost in USD.", - ) - - estimated_total_time_in_minutes: Optional[str] = Field( - default=None, - description="The estimated time in minutes.", - ) - - -class KGEnrichmentEstimationResponse(R2RSerializable): - """Response for knowledge graph enrichment estimation.""" - - message: str = Field( - default="", - description="The message to display to the user.", - ) - - total_entities: Optional[int] = Field( - default=None, - description="The total number of entities in the graph.", - ) - - total_triples: Optional[int] = Field( - default=None, - description="The total number of triples in the graph.", - ) - - estimated_llm_calls: Optional[str] = Field( - default=None, - description="The estimated number of LLM calls.", - ) - - estimated_total_in_out_tokens_in_millions: Optional[str] = Field( - default=None, - description="The estimated total number of input and output tokens in millions.", - ) - - estimated_cost_in_usd: Optional[str] = Field( - default=None, - description="The estimated cost to run the graph enrichment process.", - ) - - estimated_total_time_in_minutes: Optional[str] = Field( - default=None, - description="The estimated total time to run the graph enrichment process.", - ) - - -class KGEntitiesResponse(R2RSerializable): - """Response for knowledge graph entities.""" - - entities: list[Entity] = Field( - ..., - description="The list of entities in the graph.", - ) - - total_entries: int = Field( - ..., - description="The total number of entities in the graph for the collection or document.", - ) - - class Config: - json_schema_extra = { - "example": { - "entities": [ - { - "id": "1", - "name": "Entity 1", - "description": "Description 1", - }, - { - "id": "2", - "name": "Entity 2", - "description": "Description 2", - }, - ], - "total_entries": 2, - } - } - - -class KGTriplesResponse(R2RSerializable): - """Response for knowledge graph triples.""" - - triples: list[Triple] = Field( - ..., - description="The list of triples in the graph.", - ) - - total_entries: int = Field( - ..., - description="The total number of triples in the graph for the collection or document.", - ) - - class Config: - json_schema_extra = { - "example": { - "triples": [ - { - "subject": "Paris", - "predicate": "is capital of", - "object": "France", - "description": "Paris is the capital of France", - } - ], - "total_entries": 2, - } - } - - -class KGCommunitiesResponse(R2RSerializable): - """Response for knowledge graph communities.""" - - communities: list[CommunityReport] = Field( - ..., - description="The list of communities in the graph for the collection.", - ) - - total_entries: int = Field( - ..., - description="The total number of communities in the graph.", - ) - - class Config: - json_schema_extra = { - "example": { - "total_count": 1, - "communities": [ - { - "id": "1", - "community_number": 1, - "collection_ids": [ - "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09" - ], - "level": 0, - "name": "community name", - "summary": "community summary", - "findings": ["finding1", "finding2"], - "rating": "8", - "rating_explanation": "rating explanation", - } - ], - } - } - - -class KGEntityDeduplicationResponse(BaseModel): - """Response for knowledge graph entity deduplication.""" - - message: str = Field( - ..., - description="The message to display to the user.", - ) - - class Config: - json_schema_extra = { - "example": { - "message": "Entity deduplication queued successfully.", - } - } - - -class KGTunePromptResponse(R2RSerializable): - """Response containing just the tuned prompt string.""" - - tuned_prompt: str = Field( - ..., - description="The updated prompt.", - ) - - class Config: - json_schema_extra = {"example": {"tuned_prompt": "The updated prompt"}} - - -WrappedKGCreationResponse = ResultsWrapper[ - Union[KGCreationResponse, KGCreationEstimationResponse] -] -WrappedKGEnrichmentResponse = ResultsWrapper[ - Union[KGEnrichmentResponse, KGEnrichmentEstimationResponse] -] -WrappedKGEntitiesResponse = ResultsWrapper[KGEntitiesResponse] -WrappedKGTriplesResponse = ResultsWrapper[KGTriplesResponse] -WrappedKGTunePromptResponse = ResultsWrapper[KGTunePromptResponse] -WrappedKGCommunitiesResponse = ResultsWrapper[KGCommunitiesResponse] -WrappedKGEntityDeduplicationResponse = ResultsWrapper[ - Union[KGEntityDeduplicationResponse, KGDeduplicationEstimationResponse] -] +# Graph Responses +WrappedGraphResponse = ResultsWrapper[GraphResponse] +WrappedGraphsResponse = PaginatedResultsWrapper[list[GraphResponse]] diff --git a/py/shared/api/models/management/responses.py b/py/shared/api/models/management/responses.py index 9ece25989..a17cf4eea 100644 --- a/py/shared/api/models/management/responses.py +++ b/py/shared/api/models/management/responses.py @@ -1,20 +1,17 @@ from datetime import datetime -from typing import Any, Optional, Tuple +from typing import Any, Optional from uuid import UUID from pydantic import BaseModel +from shared.abstractions.document import DocumentResponse +from shared.abstractions.llm import Message +from shared.abstractions.user import User from shared.api.models.base import PaginatedResultsWrapper, ResultsWrapper -from ....abstractions.llm import Message - - -class UpdatePromptResponse(BaseModel): - message: str - class PromptResponse(BaseModel): - prompt_id: UUID + id: UUID name: str template: str created_at: datetime @@ -22,10 +19,6 @@ class PromptResponse(BaseModel): input_types: dict[str, str] -class AllPromptsResponse(BaseModel): - prompts: dict[str, PromptResponse] - - class LogEntry(BaseModel): key: str value: Any @@ -52,61 +45,17 @@ class AnalyticsResponse(BaseModel): filtered_logs: dict[str, Any] -class AppSettingsResponse(BaseModel): +class SettingsResponse(BaseModel): config: dict[str, Any] prompts: dict[str, Any] r2r_project_name: str # r2r_version: str -class ScoreCompletionResponse(BaseModel): - message: str - - -class UserOverviewResponse(BaseModel): - user_id: UUID - num_files: int - total_size_in_bytes: int - document_ids: list[UUID] - - -class UserResponse(BaseModel): - id: UUID - email: str - is_active: bool = True - is_superuser: bool = False - created_at: datetime = datetime.now() - updated_at: datetime = datetime.now() - is_verified: bool = False - collection_ids: list[UUID] = [] - - # Optional fields (to update or set at creation) - hashed_password: Optional[str] = None - verification_code_expiry: Optional[datetime] = None - name: Optional[str] = None - bio: Optional[str] = None - profile_picture: Optional[str] = None - - -class DocumentOverviewResponse(BaseModel): +class ChunkResponse(BaseModel): id: UUID - title: str - user_id: UUID - document_type: str - created_at: datetime - updated_at: datetime - ingestion_status: str - kg_extraction_status: str - version: str - collection_ids: list[UUID] - metadata: dict[str, Any] - summary: Optional[str] = None - - -class DocumentChunkResponse(BaseModel): - extraction_id: UUID document_id: UUID - user_id: UUID + owner_id: UUID collection_ids: list[UUID] text: str metadata: dict[str, Any] @@ -114,26 +63,20 @@ class DocumentChunkResponse(BaseModel): class CollectionResponse(BaseModel): - collection_id: UUID - name: str - description: Optional[str] - created_at: datetime - updated_at: datetime - - -class CollectionOverviewResponse(BaseModel): - collection_id: UUID + id: UUID + owner_id: Optional[UUID] name: str description: Optional[str] + graph_cluster_status: str + graph_sync_status: str created_at: datetime updated_at: datetime user_count: int document_count: int - kg_enrichment_status: str -class ConversationOverviewResponse(BaseModel): - conversation_id: UUID +class ConversationResponse(BaseModel): + id: UUID created_at: datetime user_id: Optional[UUID] = None name: Optional[str] = None @@ -151,40 +94,59 @@ class ResetDataResult(BaseModel): message: Optional[str] = None -class AddUserResponse(BaseModel): - result: bool +class MessageResponse(BaseModel): + id: UUID + message: Message + metadata: dict[str, Any] = {} -# Create wrapped versions of each response -WrappedPromptMessageResponse = ResultsWrapper[UpdatePromptResponse] -WrappedGetPromptsResponse = ResultsWrapper[AllPromptsResponse] -WrappedServerStatsResponse = ResultsWrapper[ServerStats] -WrappedLogResponse = ResultsWrapper[list[LogResponse]] -WrappedAnalyticsResponse = ResultsWrapper[AnalyticsResponse] -WrappedAppSettingsResponse = ResultsWrapper[AppSettingsResponse] -WrappedUserOverviewResponse = PaginatedResultsWrapper[ - list[UserOverviewResponse] -] -WrappedConversationResponse = ResultsWrapper[list[Tuple[str, Message, dict]]] -WrappedDocumentOverviewResponse = PaginatedResultsWrapper[ - list[DocumentOverviewResponse] -] +class BranchResponse(BaseModel): + branch_id: UUID + branch_point_id: Optional[UUID] + content: Optional[str] + created_at: datetime + user_id: Optional[UUID] = None + name: Optional[str] = None + + +# Chunk Responses +WrappedChunkResponse = ResultsWrapper[ChunkResponse] +WrappedChunksResponse = PaginatedResultsWrapper[list[ChunkResponse]] + +# Collection Responses WrappedCollectionResponse = ResultsWrapper[CollectionResponse] -WrappedCollectionListResponse = ResultsWrapper[list[CollectionResponse]] -WrappedCollectionOverviewResponse = ResultsWrapper[ - list[CollectionOverviewResponse] -] -WrappedAddUserResponse = ResultsWrapper[None] -WrappedUsersInCollectionResponse = PaginatedResultsWrapper[list[UserResponse]] -WrappedUserCollectionResponse = PaginatedResultsWrapper[ - list[CollectionResponse] -] -WrappedDocumentChunkResponse = PaginatedResultsWrapper[ - list[DocumentChunkResponse] +WrappedCollectionsResponse = PaginatedResultsWrapper[list[CollectionResponse]] + + +# Conversation Responses +WrappedConversationMessagesResponse = ResultsWrapper[list[MessageResponse]] +WrappedConversationResponse = ResultsWrapper[ConversationResponse] +WrappedConversationsResponse = PaginatedResultsWrapper[ + list[ConversationResponse] ] -WrappedDeleteResponse = ResultsWrapper[None] +WrappedMessageResponse = ResultsWrapper[MessageResponse] +WrappedMessagesResponse = PaginatedResultsWrapper[list[MessageResponse]] +WrappedBranchResponse = ResultsWrapper[BranchResponse] +WrappedBranchesResponse = PaginatedResultsWrapper[list[BranchResponse]] + +# Document Responses +WrappedDocumentResponse = ResultsWrapper[DocumentResponse] +WrappedDocumentsResponse = PaginatedResultsWrapper[list[DocumentResponse]] + +# Prompt Responses +WrappedPromptResponse = ResultsWrapper[PromptResponse] +WrappedPromptsResponse = PaginatedResultsWrapper[list[PromptResponse]] + +# System Responses +WrappedSettingsResponse = ResultsWrapper[SettingsResponse] +WrappedServerStatsResponse = ResultsWrapper[ServerStats] + +# User Responses +WrappedUserResponse = ResultsWrapper[User] +WrappedUsersResponse = PaginatedResultsWrapper[list[User]] + +# TODO: anything below this hasn't been reviewed +WrappedLogsResponse = ResultsWrapper[list[LogResponse]] +WrappedAnalyticsResponse = ResultsWrapper[AnalyticsResponse] WrappedVerificationResult = ResultsWrapper[VerificationResult] WrappedResetDataResult = ResultsWrapper[ResetDataResult] -WrappedConversationsOverviewResponse = PaginatedResultsWrapper[ - list[ConversationOverviewResponse] -] diff --git a/py/shared/api/models/retrieval/responses.py b/py/shared/api/models/retrieval/responses.py index 64b4c50bd..6b11b7618 100644 --- a/py/shared/api/models/retrieval/responses.py +++ b/py/shared/api/models/retrieval/responses.py @@ -2,17 +2,17 @@ from pydantic import BaseModel, Field -from shared.abstractions import KGSearchResult, Message, VectorSearchResult +from shared.abstractions import ChunkSearchResult, GraphSearchResult, Message from shared.abstractions.llm import LLMChatCompletion from shared.api.models.base import ResultsWrapper -class SearchResponse(BaseModel): - vector_search_results: list[VectorSearchResult] = Field( +class CombinedSearchResponse(BaseModel): + chunk_search_results: list[ChunkSearchResult] = Field( ..., description="List of vector search results", ) - kg_search_results: Optional[list[KGSearchResult]] = Field( + graph_search_results: Optional[list[GraphSearchResult]] = Field( None, description="Knowledge graph search results, if applicable", ) @@ -20,11 +20,11 @@ class SearchResponse(BaseModel): class Config: json_schema_extra = { "example": { - "vector_search_results": [ - VectorSearchResult.Config.json_schema_extra, + "chunk_search_results": [ + ChunkSearchResult.Config.json_schema_extra, ], - "kg_search_results": [ - KGSearchResult.Config.json_schema_extra, + "graph_search_results": [ + GraphSearchResult.Config.json_schema_extra, ], } } @@ -35,7 +35,7 @@ class RAGResponse(BaseModel): ..., description="The generated completion from the RAG process", ) - search_results: SearchResponse = Field( + search_results: CombinedSearchResponse = Field( ..., description="The search results used for the RAG process", ) @@ -58,88 +58,54 @@ class Config: ], }, "search_results": { - "vector_search_results": [ - VectorSearchResult.Config.json_schema_extra, + "chunk_search_results": [ + ChunkSearchResult.Config.json_schema_extra, ], - "kg_search_results": [ - KGSearchResult.Config.json_schema_extra, + "graph_search_results": [ + GraphSearchResult.Config.json_schema_extra, ], }, } } -class RAGAgentResponse(BaseModel): - messages: list[Message] = Field( - ..., description="List of messages in the RAG agent response" - ) +class AgentResponse(BaseModel): + messages: list[Message] = Field(..., description="Agent response messages") conversation_id: str = Field( ..., description="The conversation ID for the RAG agent response" ) class Config: json_schema_extra = { - "example": [ - { - "role": "system", - "content": "## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.", - "name": None, - "function_call": None, - "tool_calls": None, - }, - { - "role": "system", - "content": "You are a helpful assistant.", - "name": None, - "function_call": None, - "tool_calls": None, - }, - { - "role": "user", - "content": "Who is the greatest philospher of all time?", - "name": None, - "function_call": None, - "tool_calls": None, - }, - { - "role": "assistant", - "content": "Aristotle is widely considered the greatest philospher of all time.", - "name": None, - "function_call": None, - "tool_calls": None, - }, - { - "role": "user", - "content": "Can you tell me more about him?", - "name": None, - "function_call": None, - "tool_calls": None, - }, - { - "role": "assistant", - "content": None, - "name": None, - "function_call": { + "example": { + "messages": [ + { + "role": "assistant", + "content": None, + "name": None, + "function_call": { + "name": "search", + "arguments": '{"query":"Aristotle biography"}', + }, + "tool_calls": None, + }, + { + "role": "function", + "content": "1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n", "name": "search", - "arguments": '{"query":"Aristotle biography"}', + "function_call": None, + "tool_calls": None, }, - "tool_calls": None, - }, - { - "role": "function", - "content": "1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n", - "name": "search", - "function_call": None, - "tool_calls": None, - }, - { - "role": "assistant", - "content": "Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.", - "name": None, - "function_call": None, - "tool_calls": None, - }, - ] + { + "role": "assistant", + "content": "Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.", + "name": None, + "function_call": None, + "tool_calls": None, + }, + ], + "conversation_id": "a32b4c5d-6e7f-8a9b-0c1d-2e3f4a5b6c7d", + } } @@ -158,11 +124,10 @@ class DocumentSearchResult(BaseModel): ) -from ..management.responses import DocumentOverviewResponse - WrappedCompletionResponse = ResultsWrapper[LLMChatCompletion] # Create wrapped versions of the responses -WrappedSearchResponse = ResultsWrapper[SearchResponse] -WrappedDocumentSearchResponse = ResultsWrapper[list[DocumentOverviewResponse]] +WrappedVectorSearchResponse = ResultsWrapper[list[ChunkSearchResult]] +WrappedSearchResponse = ResultsWrapper[CombinedSearchResponse] +WrappedDocumentSearchResponse = ResultsWrapper[list[DocumentSearchResult]] WrappedRAGResponse = ResultsWrapper[RAGResponse] -WrappedRAGAgentResponse = ResultsWrapper[RAGAgentResponse] +WrappedAgentResponse = ResultsWrapper[AgentResponse] diff --git a/py/shared/utils/__init__.py b/py/shared/utils/__init__.py index 4a0362cbc..feb4c2ac1 100644 --- a/py/shared/utils/__init__.py +++ b/py/shared/utils/__init__.py @@ -1,16 +1,14 @@ from .base_utils import ( _decorate_vector_type, + _get_str_estimation_output, decrement_version, - format_entity_types, - format_relations, format_search_results_for_llm, format_search_results_for_stream, - generate_collection_id_from_name, generate_default_prompt_id, generate_default_user_collection_id, generate_document_id, generate_extraction_id, - generate_run_id, + generate_id, generate_user_id, increment_version, llm_cost_per_million_tokens, @@ -21,17 +19,14 @@ from .splitter.text import RecursiveCharacterTextSplitter, TextSplitter __all__ = [ - "format_entity_types", - "format_relations", "format_search_results_for_stream", "format_search_results_for_llm", # ID generation - "generate_run_id", + "generate_id", "generate_document_id", "generate_extraction_id", "generate_default_user_collection_id", "generate_user_id", - "generate_collection_id_from_name", "generate_default_prompt_id", # Other "increment_version", @@ -45,4 +40,5 @@ "TextSplitter", # Vector utils "_decorate_vector_type", + "_get_str_estimation_output", ] diff --git a/py/shared/utils/base_utils.py b/py/shared/utils/base_utils.py index 961c6ce6e..9680fcc9a 100644 --- a/py/shared/utils/base_utils.py +++ b/py/shared/utils/base_utils.py @@ -2,10 +2,10 @@ import json import logging from copy import deepcopy -from typing import TYPE_CHECKING, Any, AsyncGenerator, Iterable +from datetime import datetime +from typing import TYPE_CHECKING, Any, AsyncGenerator, Iterable, Optional from uuid import NAMESPACE_DNS, UUID, uuid4, uuid5 -from ..abstractions.graph import EntityType, RelationshipType from ..abstractions.search import ( AggregateSearchResult, KGCommunityResult, @@ -22,42 +22,55 @@ def format_search_results_for_llm(results: AggregateSearchResult) -> str: formatted_results = [] source_counter = 1 - if results.vector_search_results: + if results.chunk_search_results: formatted_results.append("Vector Search Results:") - for result in results.vector_search_results: + for result in results.chunk_search_results: formatted_results.extend( (f"Source [{source_counter}]:", f"{result.text}") ) source_counter += 1 - if results.kg_search_results: + if results.graph_search_results: formatted_results.append("KG Search Results:") - for kg_result in results.kg_search_results: - formatted_results.extend( - ( - f"Source [{source_counter}]:", - f"Name: {kg_result.content.name}", - ) - ) + for kg_result in results.graph_search_results: + try: + formatted_results.extend((f"Source [{source_counter}]:",)) + except AttributeError: + raise ValueError(f"Invalid KG search result: {kg_result}") + # formatted_results.extend( + # ( + # f"Source [{source_counter}]:", + # f"Type: {kg_result.content.type}", + # ) + # ) if isinstance(kg_result.content, KGCommunityResult): formatted_results.extend( ( + f"Name: {kg_result.content.name}", f"Summary: {kg_result.content.summary}", - f"Rating: {kg_result.content.rating}", - f"Rating Explanation: {kg_result.content.rating_explanation}", - "Findings:", + # f"Rating: {kg_result.content.rating}", + # f"Rating Explanation: {kg_result.content.rating_explanation}", + # "Findings:", ) ) - formatted_results.extend( - f"- {finding}" for finding in kg_result.content.findings - ) + # formatted_results.append( + # f"- {finding}" for finding in kg_result.content.findings + # ) elif isinstance( kg_result.content, - (KGEntityResult, KGRelationshipResult, KGGlobalResult), + KGEntityResult, ): + formatted_results.extend( + [ + f"Name: {kg_result.content.name}", + f"Description: {kg_result.content.description}", + ] + ) + elif isinstance(kg_result.content, KGRelationshipResult): formatted_results.append( - f"Description: {kg_result.content.description}" + f"Relationship: {kg_result.content.subject} - {kg_result.content.predicate} - {kg_result.content.object}", + # f"Description: {kg_result.content.description}" ) if kg_result.metadata: @@ -75,27 +88,25 @@ def format_search_results_for_llm(results: AggregateSearchResult) -> str: def format_search_results_for_stream( result: AggregateSearchResult, ) -> str: - VECTOR_SEARCH_STREAM_MARKER = ( - "search" # TODO - change this to vector_search in next major release - ) - KG_SEARCH_STREAM_MARKER = "kg_search" + CHUNK_SEARCH_STREAM_MARKER = "chunk_search" # TODO - change this to vector_search in next major release + GRAPH_SEARCH_STREAM_MARKER = "graph_search" context = "" - if result.vector_search_results: - context += f"<{VECTOR_SEARCH_STREAM_MARKER}>" + if result.chunk_search_results: + context += f"<{CHUNK_SEARCH_STREAM_MARKER}>" vector_results_list = [ - result.as_dict() for result in result.vector_search_results + result.as_dict() for result in result.chunk_search_results ] context += json.dumps(vector_results_list, default=str) - context += f"" + context += f"" - if result.kg_search_results: - context += f"<{KG_SEARCH_STREAM_MARKER}>" + if result.graph_search_results: + context += f"<{GRAPH_SEARCH_STREAM_MARKER}>" kg_results_list = [ - result.dict() for result in result.kg_search_results + result.dict() for result in result.graph_search_results ] context += json.dumps(kg_results_list, default=str) - context += f"" + context += f"" return context @@ -104,15 +115,22 @@ def format_search_results_for_stream( from ..pipeline.base_pipeline import AsyncPipeline -def _generate_id_from_label(label: str) -> UUID: +def _generate_id_from_label(label) -> UUID: return uuid5(NAMESPACE_DNS, label) -def generate_run_id() -> UUID: +def generate_id(label: Optional[str] = None) -> UUID: """ Generates a unique run id """ - return _generate_id_from_label(str(uuid4())) + return _generate_id_from_label(label if label != None else str(uuid4())) + + +# def generate_id(label: Optional[str]= None) -> UUID: +# """ +# Generates a unique run id +# """ +# return _generate_id_from_label(str(uuid4(label))) def generate_document_id(filename: str, user_id: UUID) -> UUID: @@ -138,13 +156,6 @@ def generate_default_user_collection_id(user_id: UUID) -> UUID: return _generate_id_from_label(str(user_id)) -def generate_collection_id_from_name(collection_name: str) -> UUID: - """ - Generates a unique collection id from a given collection name - """ - return _generate_id_from_label(collection_name) - - def generate_user_id(email: str) -> UUID: """ Generates a unique user id from a given email @@ -159,6 +170,14 @@ def generate_default_prompt_id(prompt_name: str) -> UUID: return _generate_id_from_label(prompt_name) +def generate_entity_document_id() -> UUID: + """ + Generates a unique document id inserting entities into a graph + """ + generation_time = datetime.now().isoformat() + return _generate_id_from_label(f"entity-{generation_time}") + + async def to_async_generator( iterable: Iterable[Any], ) -> AsyncGenerator[Any, None]: @@ -191,16 +210,6 @@ def decrement_version(version: str) -> str: return f"{prefix}{max(0, suffix - 1)}" -def format_entity_types(entity_types: list[EntityType]) -> str: - lines = [entity.name for entity in entity_types] - return "\n".join(lines) - - -def format_relations(predicates: list[RelationshipType]) -> str: - lines = [predicate.name for predicate in predicates] - return "\n".join(lines) - - def llm_cost_per_million_tokens( model: str, input_output_ratio: float = 2 ) -> float: @@ -261,3 +270,10 @@ def _decorate_vector_type( quantization_type: VectorQuantizationType = VectorQuantizationType.FP32, ) -> str: return f"{quantization_type.db_type}{input_str}" + + +def _get_str_estimation_output(x: tuple[Any, Any]) -> str: + if isinstance(x[0], int) and isinstance(x[1], int): + return " - ".join(map(str, x)) + else: + return " - ".join(f"{round(a, 2)}" for a in x) diff --git a/py/tests/conftest.py b/py/tests/conftest.py index af6b482dc..5c66ce9f6 100644 --- a/py/tests/conftest.py +++ b/py/tests/conftest.py @@ -20,7 +20,7 @@ VectorEntry, ) from core.base import ( - DocumentInfo, + DocumentResponse, DocumentType, IngestionConfig, IngestionStatus, @@ -59,7 +59,7 @@ def generate_random_vector_entry( vector_data = [random.random() for _ in range(dimension)] metadata = {"key": f"value_id_{id_value}", "raw_key": id_value} return VectorEntry( - extraction_id=uuid.uuid4(), + chunk_id=uuid.uuid4(), document_id=uuid.uuid4(), user_id=uuid.uuid4(), collection_ids=[uuid.uuid4()], @@ -115,17 +115,17 @@ async def postgres_db_provider( await db.upsert_entries(sample_entries) # upsert into documents_overview - document_info = DocumentInfo( + document_info = DocumentResponse( id=UUID("9fbe403b-c11c-5aae-8ade-ef22980c3ad1"), collection_ids=[UUID("122fdf6a-e116-546b-a8f6-e4cb2e2c0a09")], - user_id=UUID("00000000-0000-0000-0000-000000000003"), + owner_id=UUID("00000000-0000-0000-0000-000000000003"), document_type=DocumentType.PDF, metadata={}, title="Test Document for KG", version="1.0", size_in_bytes=1024, ingestion_status=IngestionStatus.PENDING, - kg_extraction_status=KGExtractionStatus.PENDING, + extraction_status=KGExtractionStatus.PENDING, ) await db.upsert_documents_overview(document_info) yield db diff --git a/py/tests/core/pipes/test_kg_community_summary_pipe.py b/py/tests/core/pipes/test_kg_community_summary_pipe.py index 04f519d32..9786b37db 100644 --- a/py/tests/core/pipes/test_kg_community_summary_pipe.py +++ b/py/tests/core/pipes/test_kg_community_summary_pipe.py @@ -3,14 +3,7 @@ import pytest -from core.base import ( - AsyncPipe, - Community, - CommunityReport, - Entity, - KGExtraction, - Triple, -) +from core.base import AsyncPipe, Community, Entity, KGExtraction, Relationship from core.pipes.kg.community_summary import KGCommunitySummaryPipe from shared.abstractions.vector import VectorQuantizationType @@ -53,7 +46,7 @@ def document_id(): @pytest.fixture(scope="function") -def extraction_ids(): +def chunk_ids(): return [ uuid.UUID("32ff6daf-6e67-44fa-b2a9-19384f5d9d19"), uuid.UUID("42ff6daf-6e67-44fa-b2a9-19384f5d9d19"), @@ -79,13 +72,13 @@ def embedding_vectors(embedding_dimension): @pytest.fixture(scope="function") -def entities_raw_list(document_id, extraction_ids): +def entities_raw_list(document_id, chunk_ids): return [ Entity( name="Entity1", description="Description1", category="Category1", - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, attributes={"attr1": "value1", "attr2": "value2"}, ), @@ -93,7 +86,7 @@ def entities_raw_list(document_id, extraction_ids): name="Entity2", description="Description2", category="Category2", - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, attributes={"attr3": "value3", "attr4": "value4"}, ), @@ -101,13 +94,13 @@ def entities_raw_list(document_id, extraction_ids): @pytest.fixture(scope="function") -def entities_list(extraction_ids, document_id, embedding_vectors): +def entities_list(chunk_ids, document_id, embedding_vectors): return [ Entity( id=1, name="Entity1", description="Description1", - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, description_embedding=embedding_vectors[0], ), @@ -115,7 +108,7 @@ def entities_list(extraction_ids, document_id, embedding_vectors): id=2, name="Entity2", description="Description2", - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, description_embedding=embedding_vectors[1], ), @@ -123,9 +116,9 @@ def entities_list(extraction_ids, document_id, embedding_vectors): @pytest.fixture(scope="function") -def triples_raw_list(embedding_vectors, extraction_ids, document_id): +def relationships_raw_list(embedding_vectors, chunk_ids, document_id): return [ - Triple( + Relationship( id=1, subject="Entity1", predicate="predicate1", @@ -133,11 +126,11 @@ def triples_raw_list(embedding_vectors, extraction_ids, document_id): weight=1.0, description="description1", embedding=embedding_vectors[0], - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, attributes={"attr1": "value1", "attr2": "value2"}, ), - Triple( + Relationship( id=2, subject="Entity2", predicate="predicate2", @@ -145,7 +138,7 @@ def triples_raw_list(embedding_vectors, extraction_ids, document_id): weight=1.0, description="description2", embedding=embedding_vectors[1], - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, attributes={"attr3": "value3", "attr4": "value4"}, ), @@ -156,24 +149,24 @@ def triples_raw_list(embedding_vectors, extraction_ids, document_id): async def test_community_summary_prompt( kg_community_summary_pipe, entities_list, - triples_raw_list, + relationships_raw_list, max_summary_input_length, ): summary = await kg_community_summary_pipe.community_summary_prompt( - entities_list, triples_raw_list, max_summary_input_length + entities_list, relationships_raw_list, max_summary_input_length ) expected_summary = """ Entity: Entity1 Descriptions: 1,Description1 - Triples: + Relationships: 1,Entity1,object1,predicate1,description1 Entity: Entity2 Descriptions: 2,Description2 - Triples: + Relationships: 2,Entity2,object2,predicate2,description2 """ - # "\n Entity: Entity1\n Descriptions: \n 1,Description1\n Triples: \n 1,Entity1,object1,predicate1,description1\n \n Entity: Entity2\n Descriptions: \n 2,Description2\n Triples: \n 2,Entity2,object2,predicate2,description2\n " + # "\n Entity: Entity1\n Descriptions: \n 1,Description1\n Relationships: \n 1,Entity1,object1,predicate1,description1\n \n Entity: Entity2\n Descriptions: \n 2,Description2\n Relationships: \n 2,Entity2,object2,predicate2,description2\n " assert summary.strip() == expected_summary.strip() diff --git a/py/tests/core/providers/database/relational/test_collection_db.py b/py/tests/core/providers/database/relational/test_collection_db.py index 924c46d49..276e2bfeb 100644 --- a/py/tests/core/providers/database/relational/test_collection_db.py +++ b/py/tests/core/providers/database/relational/test_collection_db.py @@ -2,7 +2,7 @@ import pytest -from core.base import DocumentInfo, DocumentType, R2RException +from core.base import DocumentResponse, DocumentType, R2RException from core.base.api.models import CollectionResponse @@ -109,7 +109,7 @@ async def test_assign_and_remove_document_from_collection( ) document_id = UUID("00000000-0000-0000-0000-000000000001") await temporary_postgres_db_provider.upsert_documents_overview( - DocumentInfo( + DocumentResponse( id=document_id, collection_ids=[], user_id=UUID("00000000-0000-0000-0000-000000000002"), diff --git a/py/tests/core/providers/database/relational/test_document_db.py b/py/tests/core/providers/database/relational/test_document_db.py index dde41db93..99764527a 100644 --- a/py/tests/core/providers/database/relational/test_document_db.py +++ b/py/tests/core/providers/database/relational/test_document_db.py @@ -4,7 +4,7 @@ import pytest from core.base import ( - DocumentInfo, + DocumentResponse, DocumentType, IngestionStatus, KGEnrichmentStatus, @@ -20,7 +20,7 @@ @pytest.mark.asyncio async def test_upsert_documents_overview(temporary_postgres_db_provider): - document_info = DocumentInfo( + document_info = DocumentResponse( id=UUID("00000000-0000-0000-0000-000000000001"), collection_ids=[UUID("00000000-0000-0000-0000-000000000002")], user_id=UUID("00000000-0000-0000-0000-000000000003"), @@ -30,7 +30,7 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=1024, ingestion_status=IngestionStatus.PENDING, - kg_extraction_status=KGExtractionStatus.PENDING, + extraction_status=KGExtractionStatus.PENDING, ) await temporary_postgres_db_provider.upsert_documents_overview( document_info @@ -52,8 +52,7 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): assert inserted_document.size_in_bytes == document_info.size_in_bytes assert inserted_document.ingestion_status == document_info.ingestion_status assert ( - inserted_document.kg_extraction_status - == document_info.kg_extraction_status + inserted_document.extraction_status == document_info.extraction_status ) # Update the document and verify the changes @@ -74,7 +73,7 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_delete_from_documents_overview(temporary_postgres_db_provider): - document_info = DocumentInfo( + document_info = DocumentResponse( id=UUID("00000000-0000-0000-0000-000000000001"), collection_ids=[UUID("00000000-0000-0000-0000-000000000002")], user_id=UUID("00000000-0000-0000-0000-000000000003"), @@ -84,7 +83,7 @@ async def test_delete_from_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=1024, ingestion_status=IngestionStatus.PENDING, - kg_extraction_status=KGExtractionStatus.PENDING, + extraction_status=KGExtractionStatus.PENDING, ) await temporary_postgres_db_provider.upsert_documents_overview( document_info @@ -103,7 +102,7 @@ async def test_delete_from_documents_overview(temporary_postgres_db_provider): @pytest.mark.asyncio async def test_get_documents_overview(temporary_postgres_db_provider): - document_info1 = DocumentInfo( + document_info1 = DocumentResponse( id=UUID("00000000-0000-0000-0000-000000000001"), collection_ids=[UUID("00000000-0000-0000-0000-000000000002")], user_id=UUID("00000000-0000-0000-0000-000000000003"), @@ -113,9 +112,9 @@ async def test_get_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=1024, ingestion_status=IngestionStatus.PENDING, - kg_extraction_status=KGExtractionStatus.PENDING, + extraction_status=KGExtractionStatus.PENDING, ) - document_info2 = DocumentInfo( + document_info2 = DocumentResponse( id=UUID("00000000-0000-0000-0000-000000000004"), collection_ids=[UUID("00000000-0000-0000-0000-000000000002")], user_id=UUID("00000000-0000-0000-0000-000000000003"), @@ -125,7 +124,7 @@ async def test_get_documents_overview(temporary_postgres_db_provider): version="1.0", size_in_bytes=2048, ingestion_status=IngestionStatus.SUCCESS, - kg_extraction_status=KGExtractionStatus.PENDING, + extraction_status=KGExtractionStatus.PENDING, ) await temporary_postgres_db_provider.upsert_documents_overview( [document_info1, document_info2] diff --git a/py/tests/core/providers/database/relational/test_user_db.py b/py/tests/core/providers/database/relational/test_user_db.py index 2388eef90..3d4cb8aa4 100644 --- a/py/tests/core/providers/database/relational/test_user_db.py +++ b/py/tests/core/providers/database/relational/test_user_db.py @@ -4,7 +4,7 @@ import pytest -from core.base.api.models import UserResponse +from core.base.api.models import User @pytest.mark.asyncio @@ -12,7 +12,7 @@ async def test_create_user(temporary_postgres_db_provider): user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - assert isinstance(user, UserResponse) + assert isinstance(user, User) assert user.email == "test@example.com" @@ -57,7 +57,7 @@ async def test_update_user(temporary_postgres_db_provider): user = await temporary_postgres_db_provider.create_user( "test@example.com", "password" ) - updated_user = UserResponse( + updated_user = User( id=user.id, email="updated@example.com", is_superuser=True, diff --git a/py/tests/core/providers/database/test_vector_db_provider.py b/py/tests/core/providers/database/test_vector_db_provider.py index ff7a70485..237bb97e0 100644 --- a/py/tests/core/providers/database/test_vector_db_provider.py +++ b/py/tests/core/providers/database/test_vector_db_provider.py @@ -1,7 +1,7 @@ import pytest from core.providers.database import PostgresDBProvider -from r2r import VectorSearchSettings +from r2r import ChunkSearchSettings @pytest.mark.asyncio @@ -15,7 +15,7 @@ async def test_search_equality_filter(postgres_db_provider, sample_entries): query_vector = sample_entries[0] results = await postgres_db_provider.semantic_search( query_vector.vector.data, - VectorSearchSettings( + ChunkSearchSettings( search_limit=10, filters={"key": {"$eq": "value_id_0"}} ), ) @@ -28,7 +28,7 @@ async def test_search_not_equal_filter(postgres_db_provider, sample_entries): query_vector = sample_entries[0] results = await postgres_db_provider.semantic_search( query_vector.vector.data, - VectorSearchSettings( + ChunkSearchSettings( search_limit=100, filters={"key": {"$ne": "value_id_0"}} ), ) @@ -43,7 +43,7 @@ async def test_search_greater_than_filter( query_vector = sample_entries[0] results = await postgres_db_provider.semantic_search( query_vector.vector.data, - VectorSearchSettings( + ChunkSearchSettings( search_limit=100, filters={"raw_key": {"$gt": 50}} ), ) @@ -58,7 +58,7 @@ async def test_search_less_than_or_equal_filter( query_vector = sample_entries[0] results = await postgres_db_provider.semantic_search( query_vector.vector.data, - VectorSearchSettings( + ChunkSearchSettings( search_limit=10, filters={"raw_key": {"$lte": 20}}, ef_search=100, # TODO - Better understand why we need to set this to search the entire database. @@ -68,7 +68,7 @@ async def test_search_less_than_or_equal_filter( results = await postgres_db_provider.semantic_search( query_vector.vector.data, - VectorSearchSettings( + ChunkSearchSettings( search_limit=100, filters={"raw_key": {"$lte": 20}} ), ) @@ -81,7 +81,7 @@ async def test_search_in_filter(postgres_db_provider, sample_entries): query_vector = sample_entries[0] results = await postgres_db_provider.semantic_search( query_vector.vector.data, - VectorSearchSettings( + ChunkSearchSettings( search_limit=10, filters={"key": {"$in": ["value_id_0", "value_id_1"]}}, ), @@ -97,7 +97,7 @@ async def test_search_complex_and_filter(postgres_db_provider, sample_entries): query_vector = sample_entries[0] results = await postgres_db_provider.semantic_search( query_vector.vector.data, - VectorSearchSettings( + ChunkSearchSettings( search_limit=10, filters={ "$and": [ @@ -117,7 +117,7 @@ async def test_search_complex_or_filter(postgres_db_provider, sample_entries): query_vector = sample_entries[0] results = await postgres_db_provider.semantic_search( query_vector.vector.data, - VectorSearchSettings( + ChunkSearchSettings( search_limit=11, ef_search=100, # TODO - Better understand why we need to set this to search the entire database. filters={ @@ -140,7 +140,7 @@ async def test_search_nested_and_or_filters( query_vector = sample_entries[0] results = await postgres_db_provider.semantic_search( query_vector.vector.data, - VectorSearchSettings( + ChunkSearchSettings( search_limit=10, ef_search=100, # TODO - Better understand why we need to set this to search the entire database. filters={ @@ -169,7 +169,7 @@ async def test_delete_equality(temporary_postgres_db_provider, sample_entries): assert len(deleted_ids) == 1 remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, - VectorSearchSettings(search_limit=100), + ChunkSearchSettings(search_limit=100), ) assert len(remaining) == 99 assert all(r.metadata["key"] != "value_id_0" for r in remaining) @@ -185,7 +185,7 @@ async def test_delete_greater_than( assert len(deleted_ids) == 9 remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, - VectorSearchSettings(search_limit=100), + ChunkSearchSettings(search_limit=100), ) assert len(remaining) == 91 assert all(int(r.text.split("_")[-1]) <= 90 for r in remaining) @@ -199,7 +199,7 @@ async def test_delete_in(temporary_postgres_db_provider, sample_entries): assert len(deleted_ids) == 2 remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, - VectorSearchSettings(search_limit=100), + ChunkSearchSettings(search_limit=100), ) assert len(remaining) == 98 assert all( @@ -223,7 +223,7 @@ async def test_delete_complex_and( assert len(deleted_ids) == 1 remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, - VectorSearchSettings(search_limit=100), + ChunkSearchSettings(search_limit=100), ) assert len(remaining) == 99 assert not any( @@ -247,7 +247,7 @@ async def test_delete_complex_or( assert len(deleted_ids) == 11 remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, - VectorSearchSettings(search_limit=100), + ChunkSearchSettings(search_limit=100), ) assert len(remaining) == 89 assert all( @@ -276,7 +276,7 @@ async def test_delete_nested_and_or( assert len(deleted_ids) == 1 remaining = await temporary_postgres_db_provider.semantic_search( sample_entries[0].vector.data, - VectorSearchSettings(search_limit=100), + ChunkSearchSettings(search_limit=100), ) assert len(remaining) == 99 assert not any( diff --git a/py/tests/core/providers/database/test_vector_index_logic.py b/py/tests/core/providers/database/test_vector_index_logic.py index 5c2f5827f..eae97edf9 100644 --- a/py/tests/core/providers/database/test_vector_index_logic.py +++ b/py/tests/core/providers/database/test_vector_index_logic.py @@ -18,7 +18,7 @@ async def test_index_lifecycle(postgres_db_provider): # Create an index index_name = f"test_index_{uuid4().hex[:8]}" await postgres_db_provider.create_index( - table_name=VectorTableName.VECTORS, + table_name=VectorTableName.CHUNKS, index_measure=IndexMeasure.cosine_distance, index_method=IndexMethod.hnsw, index_name=index_name, @@ -26,26 +26,26 @@ async def test_index_lifecycle(postgres_db_provider): ) # List indices and verify our index exists - indices = await postgres_db_provider.list_indices(VectorTableName.VECTORS) + indices = await postgres_db_provider.list_indices(VectorTableName.CHUNKS) print("indices = ", indices) assert indices, "No indices returned" assert any(index["name"] == index_name for index in indices) # # Select the index for use # await postgres_db_provider.select_index( - # index_name, VectorTableName.VECTORS + # index_name, VectorTableName.CHUNKS # ) # Delete the index await postgres_db_provider.delete_index( index_name, - table_name=VectorTableName.VECTORS, + table_name=VectorTableName.CHUNKS, concurrently=False, # Consistent with creation ) # Verify index was deleted indices_after = await postgres_db_provider.list_indices( - VectorTableName.VECTORS + VectorTableName.CHUNKS ) assert not any(index["name"] == index_name for index in indices_after) @@ -57,7 +57,7 @@ async def test_multiple_index_types(postgres_db_provider): # Create HNSW index hnsw_name = f"hnsw_index_{uuid4().hex[:8]}" await postgres_db_provider.create_index( - table_name=VectorTableName.VECTORS, + table_name=VectorTableName.CHUNKS, index_measure=IndexMeasure.cosine_distance, index_method=IndexMethod.hnsw, index_name=hnsw_name, @@ -68,7 +68,7 @@ async def test_multiple_index_types(postgres_db_provider): # Create IVF-Flat index ivf_name = f"ivf_index_{uuid4().hex[:8]}" await postgres_db_provider.create_index( - table_name=VectorTableName.VECTORS, + table_name=VectorTableName.CHUNKS, index_measure=IndexMeasure.cosine_distance, index_method=IndexMethod.ivfflat, index_name=ivf_name, @@ -77,16 +77,16 @@ async def test_multiple_index_types(postgres_db_provider): ) # List indices and verify both exist - indices = await postgres_db_provider.list_indices(VectorTableName.VECTORS) + indices = await postgres_db_provider.list_indices(VectorTableName.CHUNKS) assert any(index["name"] == hnsw_name for index in indices) assert any(index["name"] == ivf_name for index in indices) # Clean up await postgres_db_provider.delete_index( - hnsw_name, table_name=VectorTableName.VECTORS, concurrently=False + hnsw_name, table_name=VectorTableName.CHUNKS, concurrently=False ) await postgres_db_provider.delete_index( - ivf_name, table_name=VectorTableName.VECTORS, concurrently=False + ivf_name, table_name=VectorTableName.CHUNKS, concurrently=False ) @@ -101,13 +101,13 @@ async def test_index_operations_invalid_inputs(postgres_db_provider): # Try to delete non-existent index with pytest.raises(Exception): await postgres_db_provider.delete_index( - "nonexistent_index", VectorTableName.VECTORS + "nonexistent_index", VectorTableName.CHUNKS ) # Try to select non-existent index # with pytest.raises(Exception): # await postgres_db_provider.select_index( - # "nonexistent_index", VectorTableName.VECTORS + # "nonexistent_index", VectorTableName.CHUNKS # ) @@ -120,7 +120,7 @@ async def test_index_persistence( # Create index using first connection index_name = f"persist_test_{uuid4().hex[:8]}" await postgres_db_provider.create_index( - table_name=VectorTableName.VECTORS, + table_name=VectorTableName.CHUNKS, index_measure=IndexMeasure.cosine_distance, index_method=IndexMethod.hnsw, index_name=index_name, @@ -129,11 +129,11 @@ async def test_index_persistence( # Verify index exists using second connection indices = await temporary_postgres_db_provider.list_indices( - VectorTableName.VECTORS + VectorTableName.CHUNKS ) assert any(index["name"] == index_name for index in indices) # Clean up await postgres_db_provider.delete_index( - index_name, table_name=VectorTableName.VECTORS, concurrently=False + index_name, table_name=VectorTableName.CHUNKS, concurrently=False ) diff --git a/py/tests/core/providers/email/test_email_providers.py b/py/tests/core/providers/email/test_email_providers.py index 583f1b0fb..df744eb71 100644 --- a/py/tests/core/providers/email/test_email_providers.py +++ b/py/tests/core/providers/email/test_email_providers.py @@ -1,4 +1,5 @@ import pytest + from core.base.providers.email import EmailConfig from core.providers.email import SendGridEmailProvider diff --git a/py/tests/core/providers/ingestion/test_contextual_embedding.py b/py/tests/core/providers/ingestion/test_contextual_embedding.py index 6bbb75be3..97d8fb047 100644 --- a/py/tests/core/providers/ingestion/test_contextual_embedding.py +++ b/py/tests/core/providers/ingestion/test_contextual_embedding.py @@ -22,7 +22,7 @@ ChunkEnrichmentSettings, ChunkEnrichmentStrategy, ) -from shared.api.models.auth.responses import UserResponse +from shared.api.models.auth.responses import User @pytest.fixture @@ -32,7 +32,7 @@ def sample_document_id(): @pytest.fixture def sample_user(): - return UserResponse( + return User( id=UUID("87654321-8765-4321-8765-432187654321"), email="test@example.com", is_superuser=True, @@ -45,7 +45,7 @@ def collection_ids(): @pytest.fixture -def extraction_ids(): +def chunk_ids(): return [ UUID("fce959df-46a2-4983-aa8b-dd1f93777e02"), UUID("9a85269c-84cd-4dff-bf21-7bd09974f668"), @@ -54,12 +54,10 @@ def extraction_ids(): @pytest.fixture -def sample_chunks( - sample_document_id, sample_user, collection_ids, extraction_ids -): +def sample_chunks(sample_document_id, sample_user, collection_ids, chunk_ids): return [ VectorEntry( - extraction_id=extraction_ids[0], + chunk_id=chunk_ids[0], document_id=sample_document_id, user_id=sample_user.id, collection_ids=collection_ids, @@ -72,7 +70,7 @@ def sample_chunks( metadata={"chunk_order": 0}, ), VectorEntry( - extraction_id=extraction_ids[1], + chunk_id=chunk_ids[1], document_id=sample_document_id, user_id=sample_user.id, collection_ids=collection_ids, @@ -85,7 +83,7 @@ def sample_chunks( metadata={"chunk_order": 1}, ), VectorEntry( - extraction_id=extraction_ids[2], + chunk_id=chunk_ids[2], document_id=sample_document_id, user_id=sample_user.id, collection_ids=collection_ids, @@ -188,15 +186,15 @@ async def test_chunk_enrichment_basic( await ingestion_service.chunk_enrichment(sample_document_id) # document chunks - document_chunks = ( - await ingestion_service.providers.database.get_document_chunks( + list_document_chunks = ( + await ingestion_service.providers.database.list_document_chunks( sample_document_id ) ) - assert len(document_chunks["results"]) == len(sample_chunks) + assert len(list_document_chunks["results"]) == len(sample_chunks) - for document_chunk in document_chunks["results"]: + for document_chunk in list_document_chunks["results"]: assert ( document_chunk["metadata"]["chunk_enrichment_status"] == "success" ) @@ -228,4 +226,4 @@ async def test_chunk_enrichment_basic( # Creates 200 RawChunks ("Chunk number {0-199}"), ingests and enriches them all to verify concurrent processing handles large batch correctly # test_vector_storage: -# Ingests chunks, enriches them, then verifies get_document_vectors() returns vectors with correct structure including vector data and extraction_id fields +# Ingests chunks, enriches them, then verifies get_document_vectors() returns vectors with correct structure including vector data and chunk_id fields diff --git a/py/tests/core/providers/kg/test_kg_logic.py b/py/tests/core/providers/kg/test_kg_logic.py index 41378b438..59d4d4596 100644 --- a/py/tests/core/providers/kg/test_kg_logic.py +++ b/py/tests/core/providers/kg/test_kg_logic.py @@ -4,7 +4,7 @@ import pytest -from core.base import Community, CommunityReport, Entity, KGExtraction, Triple +from core.base import Community, Entity, KGExtraction, Relationship from shared.abstractions.vector import VectorQuantizationType @@ -19,7 +19,7 @@ def document_id(): @pytest.fixture(scope="function") -def extraction_ids(): +def chunk_ids(): return [ uuid.UUID("32ff6daf-6e67-44fa-b2a9-19384f5d9d19"), uuid.UUID("42ff6daf-6e67-44fa-b2a9-19384f5d9d19"), @@ -45,13 +45,13 @@ def embedding_vectors(embedding_dimension): @pytest.fixture(scope="function") -def entities_raw_list(document_id, extraction_ids): +def entities_raw_list(document_id, chunk_ids): return [ Entity( name="Entity1", description="Description1", category="Category1", - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, attributes={"attr1": "value1", "attr2": "value2"}, ), @@ -59,7 +59,7 @@ def entities_raw_list(document_id, extraction_ids): name="Entity2", description="Description2", category="Category2", - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, attributes={"attr3": "value3", "attr4": "value4"}, ), @@ -67,19 +67,19 @@ def entities_raw_list(document_id, extraction_ids): @pytest.fixture(scope="function") -def entities_list(extraction_ids, document_id, embedding_vectors): +def entities_list(chunk_ids, document_id, embedding_vectors): return [ Entity( name="Entity1", description="Description1", - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, description_embedding=embedding_vectors[0], ), Entity( name="Entity2", description="Description2", - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, description_embedding=embedding_vectors[1], ), @@ -87,27 +87,27 @@ def entities_list(extraction_ids, document_id, embedding_vectors): @pytest.fixture(scope="function") -def triples_raw_list(embedding_vectors, extraction_ids, document_id): +def relationships_raw_list(embedding_vectors, chunk_ids, document_id): return [ - Triple( + Relationship( subject="Entity1", predicate="predicate1", object="object1", weight=1.0, description="description1", embedding=embedding_vectors[0], - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, attributes={"attr1": "value1", "attr2": "value2"}, ), - Triple( + Relationship( subject="Entity2", predicate="predicate2", object="object2", weight=1.0, description="description2", embedding=embedding_vectors[1], - extraction_ids=extraction_ids, + chunk_ids=chunk_ids, document_id=document_id, attributes={"attr3": "value3", "attr4": "value4"}, ), @@ -115,19 +115,19 @@ def triples_raw_list(embedding_vectors, extraction_ids, document_id): @pytest.fixture(scope="function") -def communities_list(entities_list, triples_raw_list): +def communities_list(entities_list, relationships_raw_list): return [ Community( name="Community1", description="Description1", entities=[entities_list[0]], - triples=[triples_raw_list[0]], + relationships=[relationships_raw_list[0]], ), Community( name="Community2", description="Description2", entities=[entities_list[1]], - triples=[triples_raw_list[1]], + relationships=[relationships_raw_list[1]], ), ] @@ -142,22 +142,20 @@ def community_table_info(collection_id): @pytest.fixture(scope="function") def kg_extractions( - extraction_ids, entities_raw_list, triples_raw_list, document_id + chunk_ids, entities_raw_list, relationships_raw_list, document_id ): return [ KGExtraction( - extraction_ids=extraction_ids, - entities=entities_raw_list, - triples=triples_raw_list, + relationships=relationships_raw_list, document_id=document_id, ) ] @pytest.fixture(scope="function") -def community_report_list(embedding_vectors, collection_id): +def community_list(embedding_vectors, collection_id): return [ - CommunityReport( + Community( community_number=1, level=0, collection_id=collection_id, @@ -168,7 +166,7 @@ def community_report_list(embedding_vectors, collection_id): findings=["Findings of the community report"], embedding=embedding_vectors[0], ), - CommunityReport( + Community( community_number=2, level=0, collection_id=collection_id, @@ -193,8 +191,8 @@ async def test_create_tables( "entities": [], "total_entries": 0, } - assert await postgres_db_provider.get_triples(collection_id) == { - "triples": [], + assert await postgres_db_provider.get_relationships(collection_id) == { + "relationships": [], "total_entries": 0, } assert await postgres_db_provider.get_communities(collection_id) == { @@ -223,11 +221,11 @@ async def test_add_entities_raw( async def test_add_entities( postgres_db_provider, entities_list, collection_id ): - await postgres_db_provider.add_entities( - entities_list, table_name="document_entity" + await postgres_db_provider.graph_handler.add_entities( + entities_list, table_name="entity" ) entities = await postgres_db_provider.get_entities( - collection_id, entity_table_name="document_entity" + collection_id, entity_table_name="entity" ) assert entities["entities"][0].name == "Entity1" assert entities["entities"][1].name == "Entity2" @@ -236,47 +234,25 @@ async def test_add_entities( @pytest.mark.asyncio -async def test_add_triples( - postgres_db_provider, triples_raw_list, collection_id +async def test_add_relationships( + postgres_db_provider, relationships_raw_list, collection_id ): - await postgres_db_provider.add_triples( - triples_raw_list, table_name="chunk_triple" - ) - triples = await postgres_db_provider.get_triples(collection_id) - assert triples["triples"][0].subject == "Entity1" - assert triples["triples"][1].subject == "Entity2" - assert len(triples["triples"]) == 2 - assert triples["total_entries"] == 2 - - -@pytest.mark.asyncio -async def test_add_kg_extractions( - postgres_db_provider, kg_extractions, collection_id -): - added_extractions = await postgres_db_provider.add_kg_extractions( - kg_extractions, table_prefix="chunk_" - ) - - assert added_extractions == (2, 2) - - entities = await postgres_db_provider.get_entities( - collection_id, entity_table_name="chunk_entity" + await postgres_db_provider.graph_handler.add_relationships( + relationships_raw_list, table_name="relationship" ) - assert entities["entities"][0].name == "Entity1" - assert entities["entities"][1].name == "Entity2" - assert len(entities["entities"]) == 2 - assert entities["total_entries"] == 2 - - triples = await postgres_db_provider.get_triples(collection_id) - assert triples["triples"][0].subject == "Entity1" - assert triples["triples"][1].subject == "Entity2" - assert len(triples["triples"]) == 2 - assert triples["total_entries"] == 2 + relationships = await postgres_db_provider.get_relationships(collection_id) + assert relationships["relationships"][0].subject == "Entity1" + assert relationships["relationships"][1].subject == "Entity2" + assert len(relationships["relationships"]) == 2 + assert relationships["total_entries"] == 2 @pytest.mark.asyncio async def test_get_entity_map( - postgres_db_provider, entities_raw_list, triples_raw_list, document_id + postgres_db_provider, + entities_raw_list, + relationships_raw_list, + document_id, ): await postgres_db_provider.add_entities( entities_raw_list, table_name="chunk_entity" @@ -285,33 +261,35 @@ async def test_get_entity_map( assert entity_map["Entity1"]["entities"][0].name == "Entity1" assert entity_map["Entity2"]["entities"][0].name == "Entity2" - await postgres_db_provider.add_triples(triples_raw_list) + await postgres_db_provider.graph_handler.add_relationships( + relationships_raw_list + ) entity_map = await postgres_db_provider.get_entity_map(0, 2, document_id) assert entity_map["Entity1"]["entities"][0].name == "Entity1" assert entity_map["Entity2"]["entities"][0].name == "Entity2" - assert entity_map["Entity1"]["triples"][0].subject == "Entity1" - assert entity_map["Entity2"]["triples"][0].subject == "Entity2" + assert entity_map["Entity1"]["relationships"][0].subject == "Entity1" + assert entity_map["Entity2"]["relationships"][0].subject == "Entity2" @pytest.mark.asyncio async def test_upsert_embeddings( postgres_db_provider, collection_id, entities_list ): - table_name = "document_entity" + table_name = "entity" entities_list_to_upsert = [ ( entity.name, entity.description, str(entity.description_embedding), - entity.extraction_ids, + entity.chunk_ids, entity.document_id, ) for entity in entities_list ] - await postgres_db_provider.upsert_embeddings( + await postgres_db_provider.add_entities( entities_list_to_upsert, table_name ) @@ -323,22 +301,24 @@ async def test_upsert_embeddings( @pytest.mark.asyncio -async def test_get_all_triples( - postgres_db_provider, collection_id, triples_raw_list +async def test_get_all_relationships( + postgres_db_provider, collection_id, relationships_raw_list ): - await postgres_db_provider.add_triples(triples_raw_list) - triples = await postgres_db_provider.get_triples(collection_id) - assert triples["triples"][0].subject == "Entity1" - assert triples["triples"][1].subject == "Entity2" - assert len(triples["triples"]) == 2 + await postgres_db_provider.graph_handler.add_relationships( + relationships_raw_list + ) + relationships = await postgres_db_provider.get_relationships(collection_id) + assert relationships["relationships"][0].subject == "Entity1" + assert relationships["relationships"][1].subject == "Entity2" + assert len(relationships["relationships"]) == 2 @pytest.mark.asyncio async def test_get_communities( - postgres_db_provider, collection_id, community_report_list + postgres_db_provider, collection_id, community_list ): - await postgres_db_provider.add_community_report(community_report_list[0]) - await postgres_db_provider.add_community_report(community_report_list[1]) + await postgres_db_provider.add_community(community_list[0]) + await postgres_db_provider.add_community(community_list[1]) communities = await postgres_db_provider.get_communities(collection_id) assert communities["communities"][0].name == "Community Report 1" assert len(communities["communities"]) == 2 @@ -360,15 +340,13 @@ async def test_perform_graph_clustering( collection_id, leiden_params_1, entities_list, - triples_raw_list, + relationships_raw_list, ): - # addd entities and triples - await postgres_db_provider.add_entities( - entities_list, table_name="document_entity" - ) - await postgres_db_provider.add_triples( - triples_raw_list, table_name="chunk_triple" + # addd entities and relationships + await postgres_db_provider.add_entities(entities_list, table_name="entity") + await postgres_db_provider.graph_handler.add_relationships( + relationships_raw_list, table_name="relationship" ) num_communities = await postgres_db_provider.perform_graph_clustering( @@ -381,22 +359,20 @@ async def test_perform_graph_clustering( async def test_get_community_details( postgres_db_provider, entities_list, - triples_raw_list, + relationships_raw_list, collection_id, - community_report_list, + community_list, community_table_info, ): - await postgres_db_provider.add_entities( - entities_list, table_name="document_entity" - ) - await postgres_db_provider.add_triples( - triples_raw_list, table_name="chunk_triple" + await postgres_db_provider.add_entities(entities_list, table_name="entity") + await postgres_db_provider.graph_handler.add_relationships( + relationships_raw_list, table_name="relationship" ) await postgres_db_provider.add_community_info(community_table_info) - await postgres_db_provider.add_community_report(community_report_list[0]) + await postgres_db_provider.add_community(community_list[0]) - community_level, entities, triples = ( + community_level, entities, relationships = ( await postgres_db_provider.get_community_details( community_number=1, collection_id=collection_id ) @@ -405,4 +381,4 @@ async def test_get_community_details( assert community_level == 0 # TODO: change these to objects assert entities[0].name == "Entity1" - assert triples[0].subject == "Entity1" + assert relationships[0].subject == "Entity1" diff --git a/py/tests/core/providers/logging/test_logging_provider.py b/py/tests/core/providers/logging/test_logging_provider.py index 5e2cee8c6..aa9441ae3 100644 --- a/py/tests/core/providers/logging/test_logging_provider.py +++ b/py/tests/core/providers/logging/test_logging_provider.py @@ -8,7 +8,7 @@ from core import ( PersistentLoggingConfig, SqlitePersistentLoggingProvider, - generate_run_id, + generate_id, ) logger = logging.getLogger() @@ -16,7 +16,7 @@ @pytest.mark.asyncio async def test_logging(local_logging_provider): - run_id = generate_run_id() + run_id = generate_id() await local_logging_provider.log(run_id, "key", "value") logs = await local_logging_provider.get_logs([run_id]) assert len(logs) == 1 @@ -25,7 +25,7 @@ async def test_logging(local_logging_provider): async def test_multiple_log_entries(local_logging_provider): - run_ids = [generate_run_id() for _ in range(3)] + run_ids = [generate_id() for _ in range(3)] entries = [ (run_id, f"key_{i}", f"value_{i}") for i, run_id in enumerate(run_ids) ] @@ -70,7 +70,7 @@ async def test_multiple_log_entries(local_logging_provider): async def test_log_retrieval_limit(local_logging_provider): run_ids = [] for i in range(10): - run_ids.append(generate_run_id()) + run_ids.append(generate_id()) await local_logging_provider.log(run_ids[-1], f"key_{i}", f"value_{i}") logs = await local_logging_provider.get_logs(run_ids[:5]) @@ -78,7 +78,7 @@ async def test_log_retrieval_limit(local_logging_provider): async def test_specific_run_type_retrieval(local_logging_provider): - run_id_0, run_id_1 = generate_run_id(), generate_run_id() + run_id_0, run_id_1 = generate_id(), generate_id() await local_logging_provider.log(run_id_0, "run_type", "RETRIEVAL") await local_logging_provider.log(run_id_0, "key_0", "value_0") @@ -117,7 +117,7 @@ async def test_specific_run_type_retrieval(local_logging_provider): @pytest.mark.asyncio async def test_info_logging(local_logging_provider): - run_id = generate_run_id() + run_id = generate_id() user_id = uuid.uuid4() run_type = "RETRIEVAL" await local_logging_provider.info_log(run_id, run_type, user_id) @@ -132,10 +132,10 @@ async def test_info_logging(local_logging_provider): async def test_get_info_logs_with_user_filter(local_logging_provider): user_id_1, user_id_2 = uuid.uuid4(), uuid.uuid4() await local_logging_provider.info_log( - generate_run_id(), "RETRIEVAL", user_id_1 + generate_id(), "RETRIEVAL", user_id_1 ) await local_logging_provider.info_log( - generate_run_id(), "MANAGEMENT", user_id_2 + generate_id(), "MANAGEMENT", user_id_2 ) info_logs = await local_logging_provider.get_info_logs( diff --git a/py/tests/integration/local_harness.py b/py/tests/integration/local_harness.py index 859071021..26eff6a3e 100644 --- a/py/tests/integration/local_harness.py +++ b/py/tests/integration/local_harness.py @@ -7,7 +7,6 @@ import traceback from dataclasses import dataclass from datetime import datetime -from typing import Dict, List from colorama import Fore, Style, init @@ -17,7 +16,7 @@ class TestResult: name: str passed: bool duration: float - error: Dict = None + error: dict class TestRunner: @@ -109,7 +108,7 @@ def _setup_logger(self): logger.addHandler(ch) return logger - def run_all_categories(self) -> Dict[str, List[TestResult]]: + def run_all_categories(self) -> dict[str, list[TestResult]]: all_results = {} for category in self.test_sequences.keys(): self.logger.info( @@ -119,7 +118,7 @@ def run_all_categories(self) -> Dict[str, List[TestResult]]: all_results[category] = results return all_results - def run_test_category(self, category: str) -> List[TestResult]: + def run_test_category(self, category: str) -> list[TestResult]: results = [] try: module = importlib.import_module( @@ -177,7 +176,7 @@ def run_test_category(self, category: str) -> List[TestResult]: self._print_summary(results) return results - def _save_results(self, results: List[TestResult], category: str = None): + def _save_results(self, results: list[TestResult], category: str = None): output = { "timestamp": datetime.now().isoformat(), "category": category, @@ -197,7 +196,7 @@ def _save_results(self, results: List[TestResult], category: str = None): with open(self.results_file, "w") as f: json.dump(output, f, indent=2) - def _print_summary(self, results: List[TestResult]): + def _print_summary(self, results: list[TestResult]): total = len(results) passed = sum(1 for r in results if r.passed) failed = total - passed diff --git a/py/tests/integration/runner_cli.py b/py/tests/integration/runner_cli.py index ab63bfe59..d1693ed89 100644 --- a/py/tests/integration/runner_cli.py +++ b/py/tests/integration/runner_cli.py @@ -80,17 +80,30 @@ def compare_document_fields(documents, expected_doc): def test_document_overview_sample_file_cli(): print("Testing: Document overview contains 'aristotle.txt'") output = run_command("poetry run r2r documents-overview") - output = output.replace("'", '"').replace( - "None", "null" - ) # Replace Python None with JSON null - output_lines = output.strip().split("\n")[1:] - documents = [json.loads(ele) for ele in output_lines] + + # Skip non-JSON lines and find the JSON content + output_lines = output.strip().split("\n") + json_lines = [ + line for line in output_lines if line.strip().startswith("{") + ] + + documents = [] + for line in json_lines: + try: + # Replace Python None with JSON null and single quotes with double quotes + json_str = line.replace("'", '"').replace(": None", ": null") + doc = json.loads(json_str) + documents.append(doc) + except json.JSONDecodeError as e: + print(f"Failed to parse JSON: {e}") + print(f"Problem line: {line}") + continue aristotle_document = { "title": "aristotle.txt", "document_type": "txt", "ingestion_status": "success", - "kg_extraction_status": "pending", + "extraction_status": "pending", "version": "v0", "metadata": {"title": "aristotle.txt", "version": "v0"}, } @@ -101,15 +114,6 @@ def test_document_overview_sample_file_cli(): print("All documents:", documents) sys.exit(1) - # # Check if any document in the overview matches the Aristotle document - # if not any( - # all(doc.get(k) == v for k, v in aristotle_document.items()) - # for doc in documents - # ): - # print("Document overview test failed") - # print("Aristotle document not found in the overview") - # print("Documents:", documents) - # sys.exit(1) print("Document overview test passed") print("~" * 100) @@ -174,7 +178,7 @@ def test_vector_search_sample_file_filter_cli(): expected_lead_search_result = { "text": "Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.", - "extraction_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c", + "chunk_id": "ff8accdb-791e-5b6d-a83a-5adc32c4222c", "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", # "score": lambda x: 0.77 <= x <= 0.79, @@ -211,7 +215,7 @@ def test_hybrid_search_sample_file_filter_cli(): # lead_result = results[0] # expected_lead_search_result = { # "text": "Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n\nAristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent some time within the Macedonian palace, making his first connections with the Macedonian monarchy.[7]", - # "extraction_id": "f6f5cfb6-8654-5e1c-b574-849a8a313452", + # "chunk_id": "f6f5cfb6-8654-5e1c-b574-849a8a313452", # "document_id": "9fbe403b-c11c-5aae-8ade-ef22980c3ad1", # "user_id": "2acb499e-8428-543b-bd85-0d9098718220", # "score": lambda x: 0.016 <= x <= 0.018, @@ -225,23 +229,24 @@ def test_hybrid_search_sample_file_filter_cli(): def test_rag_response_sample_file_cli(): - print("Testing: RAG query for Aristotle's birth year") - output = run_command( - "poetry run r2r rag --query='What year was Aristotle born?'" - ) + pass + # print("Testing: RAG query for Aristotle's birth year") + # output = run_command( + # "poetry run r2r rag --query='What year was Aristotle born?'" + # ) # TODO - Can we fix the test to check by loading JSON output? # response = json.loads(output) - expected_answer = "Aristotle was born in 384 BC" + # expected_answer = "Aristotle was born in 384 BC" - if expected_answer not in output: - print( - f"RAG query test failed: Expected answer '{expected_answer}' not found in '{output}'" - ) - sys.exit(1) + # if expected_answer not in output: + # print( + # f"RAG query test failed: Expected answer '{expected_answer}' not found in '{output}'" + # ) + # sys.exit(1) - print("RAG response test passed") - print("~" * 100) + # print("RAG response test passed") + # print("~" * 100) def test_rag_response_stream_sample_file_cli(): @@ -268,13 +273,13 @@ def test_rag_response_stream_sample_file_cli(): # Check if the output contains the search and completion tags if "" not in output or "" not in output: print( - "Streaming RAG query test failed: Search results not found in output" + f"Streaming RAG query test failed: Search results not found in output. '{output}'" ) sys.exit(1) if "" not in output or "" not in output: print( - "Streaming RAG query test failed: Completion not found in output" + f"Streaming RAG query test failed: Completion not found in output. '{output}'" ) sys.exit(1) @@ -287,8 +292,7 @@ def test_kg_create_graph_sample_file_cli(): print("Calling `poetry run r2r create-graph --run`") output = run_command("poetry run r2r create-graph --run") - if "queued" in output: - time.sleep(60) + time.sleep(120) response = requests.get( "http://localhost:7272/v2/entities/", @@ -463,11 +467,11 @@ def test_kg_delete_graph_with_cascading_sample_file_cli(): assert response.json()["results"]["entities"] == [] response = requests.get( - "http://localhost:7272/v2/triples", + "http://localhost:7272/v2/relationships", params={"collection_id": "122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"}, ) - assert response.json()["results"]["triples"] == [] + assert response.json()["results"]["relationships"] == [] print("KG delete graph with cascading test passed") print("~" * 100) diff --git a/py/tests/regression/expected_outputs/test_document_management.json b/py/tests/regression/expected_outputs/test_document_management.json index eee3aec01..45c1b15d6 100644 --- a/py/tests/regression/expected_outputs/test_document_management.json +++ b/py/tests/regression/expected_outputs/test_document_management.json @@ -448,7 +448,7 @@ "results": [ { "fragment_id": "67e7ab57-eaa0-57d8-9276-da273abcdabd", - "extraction_id": "286b3218-517c-50bf-b8ea-1262e8ec6b42", + "chunk_id": "286b3218-517c-50bf-b8ea-1262e8ec6b42", "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], diff --git a/py/tests/regression/expected_outputs/test_group_management.json b/py/tests/regression/expected_outputs/test_group_management.json index 079881405..b84b01657 100644 --- a/py/tests/regression/expected_outputs/test_group_management.json +++ b/py/tests/regression/expected_outputs/test_group_management.json @@ -88,7 +88,7 @@ "vector_search_results": [ { "fragment_id": "ced2d47c-524c-58d3-8cc3-0d474312fb00", - "extraction_id": "b4451d80-760d-5e0f-93bc-cc0a89b1630d", + "chunk_id": "b4451d80-760d-5e0f-93bc-cc0a89b1630d", "document_id": "55d7b67e-c717-5e89-a956-61580475199d", "user_id": "a34207e5-9b1a-5fe3-8a02-9ff56464b112", "group_ids": [], @@ -102,7 +102,7 @@ }, { "fragment_id": "2b30d8c0-d037-5ca7-9961-08e2a13a25cd", - "extraction_id": "eae05bf5-f732-53b3-80e5-a6e39d5a23d3", + "chunk_id": "eae05bf5-f732-53b3-80e5-a6e39d5a23d3", "document_id": "653c933c-867d-5588-b6cd-54d9412a8ffa", "user_id": "a34207e5-9b1a-5fe3-8a02-9ff56464b112", "group_ids": [], @@ -116,7 +116,7 @@ } } ], - "kg_search_results": [] + "graph_search_results": [] } }, "user_2_ingest": { @@ -145,7 +145,7 @@ "vector_search_results": [ { "fragment_id": "4f3e93df-099c-58a7-a5cf-c40ba5ae76c1", - "extraction_id": "838aa00a-2d5a-588c-9aa1-2553ae514024", + "chunk_id": "838aa00a-2d5a-588c-9aa1-2553ae514024", "document_id": "6a7d57a8-0bab-55df-8674-a94b1ecd6492", "user_id": "4404314d-12a1-5299-9f7a-adfac07a5a3b", "group_ids": [], @@ -158,7 +158,7 @@ } } ], - "kg_search_results": [] + "graph_search_results": [] } } }, diff --git a/py/tests/regression/expected_outputs/test_retrieval.json b/py/tests/regression/expected_outputs/test_retrieval.json index bb8385532..85b5249f4 100644 --- a/py/tests/regression/expected_outputs/test_retrieval.json +++ b/py/tests/regression/expected_outputs/test_retrieval.json @@ -4,7 +4,7 @@ "vector_search_results": [ { "fragment_id": "392ab9b4-c4bc-5894-8edf-332fcd9245bb", - "extraction_id": "cd49a88d-92e5-59f1-8331-3d3d3ecb7f3a", + "chunk_id": "cd49a88d-92e5-59f1-8331-3d3d3ecb7f3a", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -25,7 +25,7 @@ }, { "fragment_id": "fd8508db-c444-5ed9-afce-67340354fb1e", - "extraction_id": "7f16fa20-9bc1-5841-ba74-95cdbb27e9fb", + "chunk_id": "7f16fa20-9bc1-5841-ba74-95cdbb27e9fb", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -46,7 +46,7 @@ }, { "fragment_id": "e74d31b4-5de3-581c-abaf-8d28f48f924b", - "extraction_id": "f4aa1be1-c0fa-5edd-a536-d5af7f023b31", + "chunk_id": "f4aa1be1-c0fa-5edd-a536-d5af7f023b31", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -67,7 +67,7 @@ }, { "fragment_id": "76d010b2-1498-531a-bf89-66aa17331203", - "extraction_id": "f31920df-e1db-5a2c-9b8f-9c7b845a21c1", + "chunk_id": "f31920df-e1db-5a2c-9b8f-9c7b845a21c1", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -88,7 +88,7 @@ }, { "fragment_id": "c96ee688-6e36-5abb-b066-d87779be1cf6", - "extraction_id": "eb08b70d-2e82-5de3-90ee-98537a761ea8", + "chunk_id": "eb08b70d-2e82-5de3-90ee-98537a761ea8", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -109,7 +109,7 @@ }, { "fragment_id": "2abca1b8-f005-59dd-9716-adf883ec3aca", - "extraction_id": "edd4f1f9-f6c8-5341-a1e7-ce57cac7f2fb", + "chunk_id": "edd4f1f9-f6c8-5341-a1e7-ce57cac7f2fb", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -130,7 +130,7 @@ }, { "fragment_id": "85474903-20cc-58e6-ad3c-a1b64de77557", - "extraction_id": "b69b89e5-48e1-526e-ba04-c9f5c0c56fa6", + "chunk_id": "b69b89e5-48e1-526e-ba04-c9f5c0c56fa6", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -151,7 +151,7 @@ }, { "fragment_id": "a2f05c5a-0d43-538d-b4d0-ffd29d215437", - "extraction_id": "f1cb0bd8-0721-59ab-9e39-110efccf33dd", + "chunk_id": "f1cb0bd8-0721-59ab-9e39-110efccf33dd", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -172,7 +172,7 @@ }, { "fragment_id": "2654f646-222c-50af-bd1c-c7311e6a9dc9", - "extraction_id": "b25d210b-1b58-578a-b038-34f76d77f377", + "chunk_id": "b25d210b-1b58-578a-b038-34f76d77f377", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -193,7 +193,7 @@ }, { "fragment_id": "309f729d-78eb-569a-837c-50367c20e898", - "extraction_id": "dfc368a6-efaf-5f4d-a20b-0fd6059a5f35", + "chunk_id": "dfc368a6-efaf-5f4d-a20b-0fd6059a5f35", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -213,7 +213,7 @@ } } ], - "kg_search_results": null + "graph_search_results": null } }, "basic_rag": { @@ -252,7 +252,7 @@ "vector_search_results": [ { "fragment_id": "07aa09c5-81a8-5a48-953a-532064a446f8", - "extraction_id": "d3060c36-85dc-5e8d-b8ff-cfe4c1753ccc", + "chunk_id": "d3060c36-85dc-5e8d-b8ff-cfe4c1753ccc", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -273,7 +273,7 @@ }, { "fragment_id": "2044e305-c042-5f0d-b05d-a2b97181f7a8", - "extraction_id": "4329441a-5faf-5e9d-801f-ebd753ee1bd3", + "chunk_id": "4329441a-5faf-5e9d-801f-ebd753ee1bd3", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -294,7 +294,7 @@ }, { "fragment_id": "3840834b-7c74-5417-9252-9080e609fb2f", - "extraction_id": "cf934fe1-926d-5525-a230-30946961cf28", + "chunk_id": "cf934fe1-926d-5525-a230-30946961cf28", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -315,7 +315,7 @@ }, { "fragment_id": "e2a37b27-0644-59e4-9746-37d48592a299", - "extraction_id": "6b86ac2f-ce33-5126-83e6-a8731ea677c8", + "chunk_id": "6b86ac2f-ce33-5126-83e6-a8731ea677c8", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -336,7 +336,7 @@ }, { "fragment_id": "de30c3c9-cdfd-5872-bdaf-4859bef5c3a8", - "extraction_id": "33bc6d8b-9fdc-5df7-be1d-fa7de176a0b5", + "chunk_id": "33bc6d8b-9fdc-5df7-be1d-fa7de176a0b5", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -357,7 +357,7 @@ }, { "fragment_id": "4b6ee3eb-daca-5930-bafe-946cad56cdcc", - "extraction_id": "eb2bc121-0b00-5f70-8eb6-549e1fb1ed72", + "chunk_id": "eb2bc121-0b00-5f70-8eb6-549e1fb1ed72", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -378,7 +378,7 @@ }, { "fragment_id": "af5f93d3-8b2f-5e71-a358-0dd56c2f68ac", - "extraction_id": "acf12622-2e6e-5234-9768-ba448294a81d", + "chunk_id": "acf12622-2e6e-5234-9768-ba448294a81d", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -399,7 +399,7 @@ }, { "fragment_id": "d5379124-e7ff-509f-b47f-a79152eec2d4", - "extraction_id": "2562b865-e4df-5376-9e70-927be9afbb7e", + "chunk_id": "2562b865-e4df-5376-9e70-927be9afbb7e", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -420,7 +420,7 @@ }, { "fragment_id": "98a93be5-13ba-5bd6-9a18-e7ceef0fae88", - "extraction_id": "8ab931e3-8f47-5598-90b8-928f387ec256", + "chunk_id": "8ab931e3-8f47-5598-90b8-928f387ec256", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -441,7 +441,7 @@ }, { "fragment_id": "d87084d1-c52a-5a4b-96ad-9fc1cb98bfc5", - "extraction_id": "8361bf60-bce2-56c2-b982-376a75e47d58", + "chunk_id": "8361bf60-bce2-56c2-b982-376a75e47d58", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -461,7 +461,7 @@ } } ], - "kg_search_results": null + "graph_search_results": null } } }, @@ -501,7 +501,7 @@ "vector_search_results": [ { "fragment_id": "7cbdab86-1689-5779-81bd-62f7eb3ab36d", - "extraction_id": "866f85a0-b3d6-5fc5-9ca0-dbd2373eac58", + "chunk_id": "866f85a0-b3d6-5fc5-9ca0-dbd2373eac58", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -521,7 +521,7 @@ }, { "fragment_id": "8e563fb1-4665-53a8-8a83-63a1f88e2aea", - "extraction_id": "f6bc23b5-bc80-5e49-9b55-25e9abe97073", + "chunk_id": "f6bc23b5-bc80-5e49-9b55-25e9abe97073", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -541,7 +541,7 @@ }, { "fragment_id": "eaf48cfe-592e-55fa-9f07-613a4f221c45", - "extraction_id": "fdf6127b-e623-58bc-a50b-b7e7b040c03a", + "chunk_id": "fdf6127b-e623-58bc-a50b-b7e7b040c03a", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -561,7 +561,7 @@ }, { "fragment_id": "3cb1c2db-01e4-5ea8-a39f-31f5949637f8", - "extraction_id": "02b64e7c-5aa5-5380-8fa0-3d8b64866aa8", + "chunk_id": "02b64e7c-5aa5-5380-8fa0-3d8b64866aa8", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -581,7 +581,7 @@ }, { "fragment_id": "ac15f806-8723-5fe7-832d-ed0427bd3550", - "extraction_id": "416b07ed-cdd6-51fd-8f54-4164c0160860", + "chunk_id": "416b07ed-cdd6-51fd-8f54-4164c0160860", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -602,7 +602,7 @@ }, { "fragment_id": "5d09c80f-ba83-5204-a6b3-f08831e150b0", - "extraction_id": "f86a905c-8d82-52ff-ad72-a800ca3af6f4", + "chunk_id": "f86a905c-8d82-52ff-ad72-a800ca3af6f4", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -622,7 +622,7 @@ }, { "fragment_id": "b47050a2-9906-5922-b6d4-52e4dedb499f", - "extraction_id": "d436c7f7-d7c7-509e-a383-94a94360e601", + "chunk_id": "d436c7f7-d7c7-509e-a383-94a94360e601", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -643,7 +643,7 @@ }, { "fragment_id": "d143b410-f9a4-5f3b-bb46-fb412eda8201", - "extraction_id": "37f940e2-18f4-50f5-93aa-cec422fc9211", + "chunk_id": "37f940e2-18f4-50f5-93aa-cec422fc9211", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -663,7 +663,7 @@ }, { "fragment_id": "09f57c70-e7c6-548b-897f-fb8e9aba31c8", - "extraction_id": "a021aa95-14d9-5301-9252-b06bcb852956", + "chunk_id": "a021aa95-14d9-5301-9252-b06bcb852956", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -684,7 +684,7 @@ }, { "fragment_id": "dbd5427c-f5ef-5fa6-83ae-a4a8ddbb48c2", - "extraction_id": "14b08757-0819-5105-af37-509686dd6d01", + "chunk_id": "14b08757-0819-5105-af37-509686dd6d01", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [], @@ -705,7 +705,7 @@ } } ], - "kg_search_results": null + "graph_search_results": null } } }, @@ -715,7 +715,7 @@ "choices": [ { "message": { - "content": "[{\"fragment_id\": \"94684f2d-fe60-5ba3-b1e8-0a921841bac9\", \"extraction_id\": \"fde39a49-00fc-5622-addd-13eb9c3bad4b\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6675721804840684, \"text\": \"Total Stockholders\\u2019 Equity (Deficit) 1,676,163\\n\\n5,184\\n\\n\\u2014\\n\\n(26,298)\\n\\n28,637\\n\\n(1) 721,710 (2,038) (1,009,359) 1,393,998\\n\\nLyft, Inc. Consolidated Statements of Cash Flows (in thousands)\\n\\n2021\\n\\nCash flows from operating activities Net loss Adjustments to reconcile net loss to net cash used in operating activities\\n\\n$\\n\\n(1,009,359)\\n\\nDepreciation and amortization Stock-based compensation Amortization of premium on marketable securities Accretion of discount on marketable securities Amortization of debt discount and issuance costs Deferred income tax from convertible senior notes Loss on sale and disposal of assets, net Gain on divestiture Other Changes in operating assets and liabilities, net effects of acquisition\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 572, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 82, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"0d5c5803-8846-59d4-8ae3-3696b718f162\", \"extraction_id\": \"fa3d2549-593a-5a80-88a2-b2d031d79771\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6647443571534382, \"text\": \"79\\n\\n2019 3,615,960\\n\\n2,176,469 636,116 1,505,640 814,122 1,186,093 6,318,440 (2,702,480) \\u2014 102,595 (2,599,885) 2,356 (2,602,241)\\n\\n(11.44)\\n\\n227,498\\n\\n81,321 75,212 971,941 72,046 398,791\\n\\nLyft, Inc. Consolidated Statements of Comprehensive Loss (in thousands)\\n\\nNet loss Other comprehensive income (loss)\\n\\n$\\n\\nYear Ended December 31, 2020 (1,752,857) $\\n\\n2021 (1,009,359) $\\n\\nForeign currency translation adjustment Unrealized gain (loss) on marketable securities, net of taxes\\n\\nOther comprehensive income (loss)\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 567, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 79, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"5d663f0a-c9a9-580f-818b-5ca0a1ca73f2\", \"extraction_id\": \"12916d2c-0691-528c-86aa-6784c1f35c55\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6628832616140414, \"text\": \"Overview\\n\\nLyft, Inc (the \\u201cCompany\\u201d or \\u201cLyft\\u201d) started a movement to revolutionize transportation. In 2012, we launched our peer-to-peer marketplace for on-demand ridesharing and have continued to pioneer innovations aligned with our mission. Today, Lyft is one of the largest multimodal transportation networks in the United States and Canada.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 16, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"7133acca-f147-5f43-b2e0-71228282fda0\", \"extraction_id\": \"212d8290-564d-5039-93cc-00cea31a1771\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6603014862717096, \"text\": \"Revenues from Contracts with Customers (ASC 606)\\n\\nWe generate substantially all our revenue from our ridesharing marketplace that connects drivers and riders. We recognize revenue from fees paid by drivers for use of our Lyft Platform offerings in accordance with ASC 606 as described in Note 2 of the notes to our consolidated financial statements. Drivers enter into terms of service (\\u201cToS\\u201d) with us in order to use our Lyft Driver App.\\n\\n58\\n\\n2019 to 2020 % Change\\n\\n19.0% (1.8)% (6.7)% 2.3%\\n\\nWe provide a service to drivers to complete a successful transportation service for riders. This service includes on-demand lead generation that assists drivers to find, receive and fulfill on-demand requests from riders seeking transportation services and related collection activities using our Lyft Platform. As a result, our single performance obligation in the transaction is to connect drivers with riders to facilitate the completion of a successful transportation service for riders.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 459, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 58, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"1a76991f-fa85-59b2-b522-700d47b2d809\", \"extraction_id\": \"0773cd62-b39f-517f-b6f1-be788b38374d\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6477107388714619, \"text\": \"Corporate Information\\n\\nWe were incorporated in 2007 as Bounder Web, Inc., a Delaware corporation. In 2008, we changed our name to Zimride, Inc. We founded Lyft in 2012 and\\n\\nchanged our name to Lyft, Inc. in 2013 when we sold the assets related to our Zimride operations.\\n\\n13\\n\\nAvailable Information\\n\\nOur website is located at www.lyft.com, and our investor relations website is located at investor.lyft.com. Copies of our Annual Report on Form 10-K, Quarterly Reports on Form 10-Q, Current Reports on Form 8-K and amendments to these reports filed or furnished pursuant to Section 13(a) or 15(d) of the Exchange Act, as amended, are available free of charge on our investor relations website as soon as reasonably practicable after we file such material electronically with or furnish it to the Securities and Exchange Commission (the \\u201cSEC\\u201d). The SEC also maintains a website that contains our SEC filings at www.sec.gov.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 82, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"17673edc-6fb7-577d-9bca-457c5745382d\", \"extraction_id\": \"bde94416-baaa-573a-9bc7-86ddf28535b1\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6406270265579269, \"text\": \"We generate substantially all of our revenue from our ridesharing marketplace that connects drivers and riders. We collect service fees and commissions from drivers for their use of our ridesharing marketplace. As drivers accept more rider leads and complete more rides, we earn more revenue. We also generate revenue from riders renting Light Vehicles, drivers renting vehicles through Express Drive, Lyft Rentals renters, Lyft Driver Center and Lyft Auto Care users, and by making our ridesharing marketplace available to organizations through our Lyft Business offerings, such as our Concierge and Corporate Business Travel programs. In the second quarter of 2021, we began generating revenues from licensing and data access agreements, primarily with third-party autonomous vehicle companies.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 20, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"8f6299b7-4582-5bac-8c74-7ca57714aefa\", \"extraction_id\": \"310e9e1f-25d3-5287-a905-5446f661d6da\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6394975757149678, \"text\": \"Revenue Recognition\\n\\nThe Company generates its revenue from its multimodal transportation networks that offer access to a variety of transportation options through the Lyft Platform and mobile-based applications. Substantially all of the Company\\u2019s revenue is generated from its ridesharing marketplace that connects drivers and riders and is recognized in accordance with Accounting Standards Codification Topic 606 (\\u201cASC 606\\u201d). In addition, the Company generates revenue in accordance with ASC 606 from licensing and data access, primarily with third-party autonomous vehicle companies. The Company also generates rental revenue from Flexdrive, its network of Light Vehicles and Lyft Rentals, which is recognized in accordance with Accounting Standards Codification Topic 842 (\\u201cASC 842\\u201d).\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 86, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"ff837ea0-0062-59ca-bb4f-aa7a1c9cecd0\", \"extraction_id\": \"41e4db8a-0478-5015-8263-cde0618ec626\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6308713775353904, \"text\": \"Light Vehicle Rider and Lyft Rentals Renter Incentives\\n\\nIncentives offered to Light Vehicle riders and Lyft Rentals renters were not material for the years ended December 31, 2021 and 2020.\\n\\nFor the years ended December 31, 2021, 2020 and 2019, in relation to the driver, rider, Light Vehicle riders and Lyft Rentals renters incentive programs, the Company recorded $1.3 billion, $390.8 million and $560.3 million as a reduction to revenue and $64.7 million, $135.0 million and $381.5 million as sales and marketing expense, respectively.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 611, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 89, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"c24d144b-c22d-5c08-876f-a03e43620aa4\", \"extraction_id\": \"2a4caab0-6193-5263-8eab-c7763e8f38e8\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6295160430381335, \"text\": \"Software Development Costs\\n\\nThe Company incurs costs related to developing the Lyft Platform and related support systems. The Company capitalizes development costs related to the Lyft Platform and related support systems once the preliminary project stage is complete and it is probable that the project will be completed and the software will be used to perform the function intended. The Company capitalized $16.2 million and $12.8 million of software development costs during the year ended December 31, 2021 and 2020, respectively. For the year ended December 31, 2019, capitalized software development costs was not material.\\n\\nInsurance Reserves\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 649, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 94, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"83985ff5-653f-53eb-b137-f616b4292f51\", \"extraction_id\": \"0919e3d5-03b0-5d54-b5f3-7f6ad4534412\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6223346365889701, \"text\": \"32.1\\u2020\\n\\nCertifications of Principal Executive Officer and Principal Financial Officer pursuant to 18 U.S.C. Section 1350, as adopted pursuant to Section 906 of the Sarbanes-Oxley Act of 2002.\\n\\n101\\n\\nThe following financial information from Lyft, Inc.\\u2019s Annual Report on Form 10-K for the fiscal year ended December 31, 2021 formatted in Inline XBRL (eXtensible Business Reporting Language): (i) Consolidated Statements of Operations for the fiscal years ended December 31, 2021, 2020 and 2019; (ii) Consolidated Statements of Comprehensive Income (Loss) for the fiscal years ended December 31, 2021, 2020, and 2019; (iii) Consolidated Balance Sheets as of December 31, 2021 and 2020; (iv) Consolidated Statements of Cash Flows for the fiscal years ended December 31, 2021, 2020, and 2019; (v) Consolidated Statements of Redeemable Convertible Preferred Stock and Stockholders\\u2019 Equity for the fiscal years ended December 31, 2021, 2020, and 2019; and (vi) Notes to the Consolidated Financial Statements.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 817, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 127, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}]Lyft's profit in 2020 was a net loss of $1,752,857,000 [2]." + "content": "[{\"fragment_id\": \"94684f2d-fe60-5ba3-b1e8-0a921841bac9\", \"chunk_id\": \"fde39a49-00fc-5622-addd-13eb9c3bad4b\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6675721804840684, \"text\": \"Total Stockholders\\u2019 Equity (Deficit) 1,676,163\\n\\n5,184\\n\\n\\u2014\\n\\n(26,298)\\n\\n28,637\\n\\n(1) 721,710 (2,038) (1,009,359) 1,393,998\\n\\nLyft, Inc. Consolidated Statements of Cash Flows (in thousands)\\n\\n2021\\n\\nCash flows from operating activities Net loss Adjustments to reconcile net loss to net cash used in operating activities\\n\\n$\\n\\n(1,009,359)\\n\\nDepreciation and amortization Stock-based compensation Amortization of premium on marketable securities Accretion of discount on marketable securities Amortization of debt discount and issuance costs Deferred income tax from convertible senior notes Loss on sale and disposal of assets, net Gain on divestiture Other Changes in operating assets and liabilities, net effects of acquisition\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 572, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 82, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"0d5c5803-8846-59d4-8ae3-3696b718f162\", \"chunk_id\": \"fa3d2549-593a-5a80-88a2-b2d031d79771\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6647443571534382, \"text\": \"79\\n\\n2019 3,615,960\\n\\n2,176,469 636,116 1,505,640 814,122 1,186,093 6,318,440 (2,702,480) \\u2014 102,595 (2,599,885) 2,356 (2,602,241)\\n\\n(11.44)\\n\\n227,498\\n\\n81,321 75,212 971,941 72,046 398,791\\n\\nLyft, Inc. Consolidated Statements of Comprehensive Loss (in thousands)\\n\\nNet loss Other comprehensive income (loss)\\n\\n$\\n\\nYear Ended December 31, 2020 (1,752,857) $\\n\\n2021 (1,009,359) $\\n\\nForeign currency translation adjustment Unrealized gain (loss) on marketable securities, net of taxes\\n\\nOther comprehensive income (loss)\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 567, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 79, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"5d663f0a-c9a9-580f-818b-5ca0a1ca73f2\", \"chunk_id\": \"12916d2c-0691-528c-86aa-6784c1f35c55\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6628832616140414, \"text\": \"Overview\\n\\nLyft, Inc (the \\u201cCompany\\u201d or \\u201cLyft\\u201d) started a movement to revolutionize transportation. In 2012, we launched our peer-to-peer marketplace for on-demand ridesharing and have continued to pioneer innovations aligned with our mission. Today, Lyft is one of the largest multimodal transportation networks in the United States and Canada.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 16, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"7133acca-f147-5f43-b2e0-71228282fda0\", \"chunk_id\": \"212d8290-564d-5039-93cc-00cea31a1771\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6603014862717096, \"text\": \"Revenues from Contracts with Customers (ASC 606)\\n\\nWe generate substantially all our revenue from our ridesharing marketplace that connects drivers and riders. We recognize revenue from fees paid by drivers for use of our Lyft Platform offerings in accordance with ASC 606 as described in Note 2 of the notes to our consolidated financial statements. Drivers enter into terms of service (\\u201cToS\\u201d) with us in order to use our Lyft Driver App.\\n\\n58\\n\\n2019 to 2020 % Change\\n\\n19.0% (1.8)% (6.7)% 2.3%\\n\\nWe provide a service to drivers to complete a successful transportation service for riders. This service includes on-demand lead generation that assists drivers to find, receive and fulfill on-demand requests from riders seeking transportation services and related collection activities using our Lyft Platform. As a result, our single performance obligation in the transaction is to connect drivers with riders to facilitate the completion of a successful transportation service for riders.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 459, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 58, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"1a76991f-fa85-59b2-b522-700d47b2d809\", \"chunk_id\": \"0773cd62-b39f-517f-b6f1-be788b38374d\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6477107388714619, \"text\": \"Corporate Information\\n\\nWe were incorporated in 2007 as Bounder Web, Inc., a Delaware corporation. In 2008, we changed our name to Zimride, Inc. We founded Lyft in 2012 and\\n\\nchanged our name to Lyft, Inc. in 2013 when we sold the assets related to our Zimride operations.\\n\\n13\\n\\nAvailable Information\\n\\nOur website is located at www.lyft.com, and our investor relations website is located at investor.lyft.com. Copies of our Annual Report on Form 10-K, Quarterly Reports on Form 10-Q, Current Reports on Form 8-K and amendments to these reports filed or furnished pursuant to Section 13(a) or 15(d) of the Exchange Act, as amended, are available free of charge on our investor relations website as soon as reasonably practicable after we file such material electronically with or furnish it to the Securities and Exchange Commission (the \\u201cSEC\\u201d). The SEC also maintains a website that contains our SEC filings at www.sec.gov.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 82, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"17673edc-6fb7-577d-9bca-457c5745382d\", \"chunk_id\": \"bde94416-baaa-573a-9bc7-86ddf28535b1\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6406270265579269, \"text\": \"We generate substantially all of our revenue from our ridesharing marketplace that connects drivers and riders. We collect service fees and commissions from drivers for their use of our ridesharing marketplace. As drivers accept more rider leads and complete more rides, we earn more revenue. We also generate revenue from riders renting Light Vehicles, drivers renting vehicles through Express Drive, Lyft Rentals renters, Lyft Driver Center and Lyft Auto Care users, and by making our ridesharing marketplace available to organizations through our Lyft Business offerings, such as our Concierge and Corporate Business Travel programs. In the second quarter of 2021, we began generating revenues from licensing and data access agreements, primarily with third-party autonomous vehicle companies.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 20, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"8f6299b7-4582-5bac-8c74-7ca57714aefa\", \"chunk_id\": \"310e9e1f-25d3-5287-a905-5446f661d6da\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6394975757149678, \"text\": \"Revenue Recognition\\n\\nThe Company generates its revenue from its multimodal transportation networks that offer access to a variety of transportation options through the Lyft Platform and mobile-based applications. Substantially all of the Company\\u2019s revenue is generated from its ridesharing marketplace that connects drivers and riders and is recognized in accordance with Accounting Standards Codification Topic 606 (\\u201cASC 606\\u201d). In addition, the Company generates revenue in accordance with ASC 606 from licensing and data access, primarily with third-party autonomous vehicle companies. The Company also generates rental revenue from Flexdrive, its network of Light Vehicles and Lyft Rentals, which is recognized in accordance with Accounting Standards Codification Topic 842 (\\u201cASC 842\\u201d).\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 86, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"ff837ea0-0062-59ca-bb4f-aa7a1c9cecd0\", \"chunk_id\": \"41e4db8a-0478-5015-8263-cde0618ec626\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6308713775353904, \"text\": \"Light Vehicle Rider and Lyft Rentals Renter Incentives\\n\\nIncentives offered to Light Vehicle riders and Lyft Rentals renters were not material for the years ended December 31, 2021 and 2020.\\n\\nFor the years ended December 31, 2021, 2020 and 2019, in relation to the driver, rider, Light Vehicle riders and Lyft Rentals renters incentive programs, the Company recorded $1.3 billion, $390.8 million and $560.3 million as a reduction to revenue and $64.7 million, $135.0 million and $381.5 million as sales and marketing expense, respectively.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 611, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 89, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"c24d144b-c22d-5c08-876f-a03e43620aa4\", \"chunk_id\": \"2a4caab0-6193-5263-8eab-c7763e8f38e8\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6295160430381335, \"text\": \"Software Development Costs\\n\\nThe Company incurs costs related to developing the Lyft Platform and related support systems. The Company capitalizes development costs related to the Lyft Platform and related support systems once the preliminary project stage is complete and it is probable that the project will be completed and the software will be used to perform the function intended. The Company capitalized $16.2 million and $12.8 million of software development costs during the year ended December 31, 2021 and 2020, respectively. For the year ended December 31, 2019, capitalized software development costs was not material.\\n\\nInsurance Reserves\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 649, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 94, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"83985ff5-653f-53eb-b137-f616b4292f51\", \"chunk_id\": \"0919e3d5-03b0-5d54-b5f3-7f6ad4534412\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6223346365889701, \"text\": \"32.1\\u2020\\n\\nCertifications of Principal Executive Officer and Principal Financial Officer pursuant to 18 U.S.C. Section 1350, as adopted pursuant to Section 906 of the Sarbanes-Oxley Act of 2002.\\n\\n101\\n\\nThe following financial information from Lyft, Inc.\\u2019s Annual Report on Form 10-K for the fiscal year ended December 31, 2021 formatted in Inline XBRL (eXtensible Business Reporting Language): (i) Consolidated Statements of Operations for the fiscal years ended December 31, 2021, 2020 and 2019; (ii) Consolidated Statements of Comprehensive Income (Loss) for the fiscal years ended December 31, 2021, 2020, and 2019; (iii) Consolidated Balance Sheets as of December 31, 2021 and 2020; (iv) Consolidated Statements of Cash Flows for the fiscal years ended December 31, 2021, 2020, and 2019; (v) Consolidated Statements of Redeemable Convertible Preferred Stock and Stockholders\\u2019 Equity for the fiscal years ended December 31, 2021, 2020, and 2019; and (vi) Notes to the Consolidated Financial Statements.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 817, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 127, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}]Lyft's profit in 2020 was a net loss of $1,752,857,000 [2]." } } ] diff --git a/py/tests/regression/observed_outputs/test_document_management.json b/py/tests/regression/observed_outputs/test_document_management.json index eb99b338b..adee70843 100644 --- a/py/tests/regression/observed_outputs/test_document_management.json +++ b/py/tests/regression/observed_outputs/test_document_management.json @@ -479,7 +479,7 @@ "document_chunks_test": { "results": [ { - "extraction_id": "bcd08cd0-1551-5ee2-ad08-551ae15e5ed1", + "chunk_id": "bcd08cd0-1551-5ee2-ad08-551ae15e5ed1", "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ diff --git a/py/tests/regression/observed_outputs/test_observability.json b/py/tests/regression/observed_outputs/test_observability.json index 692ca2a0e..195861ff8 100644 --- a/py/tests/regression/observed_outputs/test_observability.json +++ b/py/tests/regression/observed_outputs/test_observability.json @@ -68,12 +68,12 @@ }, { "key": "completion_record", - "value": "{\"message_id\": \"10400ac0-cfdc-5bf4-a3db-a18a1fa0cca5\", \"message_type\": \"assistant\", \"timestamp\": \"2024-10-03T22:44:59.813045\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-10-03T22:54:21.484798\", \"completion_end_time\": \"2024-10-03T22:54:22.505957\", \"search_query\": \"What was Uber's profit in 2020?\", \"search_results\": {\"vector_search_results\": [{\"extraction_id\": \"328e5142-bd6c-5553-b5a0-8fdbd72ee6c6\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.7446624344989735, \"text\": \"Revenue was $17.5 billion, or up 57% year-over-year, reflecting the overall growth in our Delivery business and an increase in Freight revenue attributable to the acquisition of Transplace in the fourth quarter of 2021 as well as growth in the number of shippers and carriers on the network combined with an increase in volumes with our top shippers.\\n\\nNet loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of our ATG Business to Aurora, a $1.6 billion pre-tax net benefit relating to Uber\\u2019s equity investments, as well as reductions in our fixed cost structure and increased variable cost efficiencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 445, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 53, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"extraction_id\": \"a0b5c2f6-7dcd-5865-b2c6-0b3cd2189e57\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.7071749146476451, \"text\": \"Total costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n100 %\\n\\n46 % 16 % 32 % 20 % 24 % 5 % 144 % (44)% (4)% (15)% (62)% (2)% \\u2014 % (61)% \\u2014 % (61)%\\n\\n100 %\\n\\n54 % 11 % 27 % 12 % 13 % 5 % 122 % (22)% (3)% 19 % (6)% (3)% \\u2014 % (3)% \\u2014 % (3)%\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n(1)\\n\\nTotals of percentage of revenues may not foot due to rounding.\\n\\nComparison of the Years Ended December 31, 2020 and 2021\\n\\nRevenue\\n\\nYear Ended December 31,\\n\\n(In millions, except percentages)\\n\\n2020\\n\\n2021\\n\\n2020 to 2021 % Change\\n\\nRevenue\\n\\n$\\n\\n11,139 $\\n\\n17,455\\n\\n57 %\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 463, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 57, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"extraction_id\": \"500bf649-b2a8-521b-bdb2-78cdc342531f\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6810148751433258, \"text\": \"Year Ended December 31, 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\\n\\nTotal costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n13,000 $\\n\\n6,061 2,302 4,626 4,836 3,299 472 21,596 (8,596) (559) 722 (8,433) 45 (34) (8,512) (6) (8,506) $\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\nNet loss attributable to Uber Technologies, Inc. Net loss per share attributable to Uber Technologies, Inc. common stockholders:\\n\\n$\\n\\nBasic\\n\\n$\\n\\n(6.81) $\\n\\n(3.86) $\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 574, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 77, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"extraction_id\": \"90b1f17b-a97f-5552-9951-fbc6df634039\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6806196963602422, \"text\": \"Less: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\n17,455\\n\\n9,351 1,877 4,789 2,054 2,316 902 21,289 (3,834) (483) 3,292 (1,025) (492) (37) (570) (74) (496)\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n$\\n\\n54\\n\\nThe following table sets forth the components of our consolidated statements of operations for each of the periods presented as a percentage of revenue\\n\\n(1)\\n\\n:\\n\\nYear Ended December 31, 2021 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 462, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 56, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"extraction_id\": \"845a2b04-70ee-5a70-91fa-44016677fd92\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6537216512130718, \"text\": \"The Uber Service activities are performed to satisfy our sole performance obligation in the transaction, which is to connect Drivers and Merchants with end-\\n\\nusers to facilitate the completion of a successful transaction.\\n\\nIn 2020, we began charging Mobility end-users a fee to use the platform in certain markets. In these transactions, in addition to a performance obligation to Drivers, we also have a performance obligation to end-users, which is to connect end-users to Drivers in the marketplace. We recognize revenue when a trip is complete. We present revenue on a net basis for these transactions, as we do not control the service provided by Drivers to end-users. For the years ended December 31, 2020 and 2021, we recognized total revenue of $323 million and $336 million, respectively, associated with these fees charged to end-users.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 642, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 90, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"extraction_id\": \"1739d713-3fb6-534f-8ddb-7ff9cd6484c7\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.638846836158823, \"text\": \"Other income (expense), net\\n\\n$\\n\\nDuring the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operations to Zomato recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (\\u201cSennder\\u201d) recognized in the fourth quarter of 2020, partially offset by a $27 million loss on the sale of our JUMP operations to Lime recognized in the second quarter of 2020.\\n\\n(1)\\n\\nDuring the year ended December 31, 2021, gain on business divestitures, net represented a $1.6 billion gain on the sale of our ATG Business to Aurora\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 799, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 118, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"extraction_id\": \"70e9089c-56e0-52f7-80ea-ad66fe1f9a79\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6322252592771936, \"text\": \"2019\\n\\n100.0 %\\n\\n60.2 17.6 41.6 22.5 32.8 174.7 (74.7) \\u2014 2.8 (71.9) 0.1 (72.0)%\\n\\n2019 to 2020 % Change\\n\\n(35) %\\n\\nsecond quarter of 2021. These increases were offset by investments in driver supply by increasing driver incentives recorded as a reduction to revenue by $942.9 million in 2021 as compared to the prior year as rider demand outpaced driver supply during certain periods of the pandemic recovery in 2021. Revenue in 2020 was also higher in the first quarter of 2020 prior to the implementation of shelter-in-place orders and other travel restrictions across North America beginning March 2020.\\n\\nWe expect to see continued recovery in demand for our platform and the resulting positive impacts on revenue as there are more widespread immunity levels, more communities reopen and other restrictive travel and social distancing measures in response to COVID-19 are eased. However, we cannot predict the impact of COVID variants and the longer term impact of the pandemic on consumer behavior.\\n\\nCost of Revenue\\n\\n2021\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 493, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 63, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"extraction_id\": \"5425859b-cbfa-54e4-9729-5f92c6f61efc\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6301008528290666, \"text\": \"For additional discussion, see the risk factor titled \\u201c\\u2014If we are unable to attract or maintain a critical mass of Drivers, consumers, merchants, shippers, and carriers, whether as a result of competition or other factors, our platform will become less appealing to platform users, and our financial results would be adversely impacted.\\u201d included in Part I, Item 1A of this Annual Report on Form 10-K as well our 2021 ESG Report and our 2021 People and Culture Report. The information in these reports is not a part of this Form 10-K.\\n\\nAdditional Information\\n\\nWe were founded in 2009 and incorporated as Ubercab, Inc., a Delaware corporation, in July 2010. In February 2011, we changed our name to Uber\\n\\nTechnologies, Inc. Our principal executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.\\n\\n10\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 12, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"extraction_id\": \"9dae5d7c-4bcd-52f0-bdfc-a9e327c56069\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6285498210400674, \"text\": \"Uber Technologies, Inc. (\\u201cUber,\\u201d \\u201cwe,\\u201d \\u201cour,\\u201d or \\u201cus\\u201d) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber is a technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B. Uber develops and operates proprietary technology applications supporting a variety of offerings on its platform (\\u201cplatform(s)\\u201d or \\u201cPlatform(s)\\u201d). Uber connects consumers (\\u201cRider(s)\\u201d) with independent providers of ride services (\\u201cMobility Driver(s)\\u201d) for ridesharing services, and connects Riders and other consumers (\\u201cEaters\\u201d) with restaurants, grocers and other stores (collectively, \\u201cMerchants\\u201d) with delivery service providers (\\u201cCouriers\\u201d) for meal preparation, grocery and other delivery services. Riders and Eaters are collectively referred to as \\u201cend-user(s)\\u201d or \\u201cconsumer(s).\\u201d Mobility Drivers and Couriers are collectively referred to as \\u201cDriver(s).\\u201d Uber also connects consumers with public\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 592, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 84, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"extraction_id\": \"9bba73a7-4ebf-51f2-8a55-553a93d2ac41\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.628432135926722, \"text\": \"Year Ended December 31, 2020\\n\\n1,000 49 1,189 (27) \\u2014 (138) (34) 8,939 (4) 3,824\\n\\n247 125 2,628 (527) (891) (224) 38 1,379 (92) (4,327)\\n\\n8,209 34 12,067 $\\n\\n12,067 (349) 7,391 $\\n\\n332 $ 133\\n\\n412 $ 82\\n\\n14,224 4,229 \\u2014 251 9 \\u2014 \\u2014\\n\\n\\u2014 \\u2014 \\u2014 196 3,898 171 1,634\\n\\n2021\\n\\n675 107 1,484 (27) (307) (226) 101 1,780 (69) 65\\n\\n7,391 349 7,805\\n\\n449 87\\n\\n\\u2014 \\u2014 232 184 1,868 1,018 \\u2014\\n\\nUBER TECHNOLOGIES, INC.\\n\\nNOTES TO CONSOLIDATED FINANCIAL STATEMENTS\\n\\nNote 1 \\u2013 Description of Business and Summary of Significant Accounting Policies\\n\\nDescription of Business\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 83, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-AEP09aw4hSfgVVU9Rl7pJxBUUidjO\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"Uber's profit in 2020 was not a profit but a net loss. The net loss attributable to Uber Technologies, Inc. for the year ended December 31, 2020, was $6,768 million [3].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1727996061, \"model\": \"gpt-4o-2024-08-06\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_e5e4913e83\", \"usage\": {\"completion_tokens\": 47, \"prompt_tokens\": 2320, \"total_tokens\": 2367, \"completion_tokens_details\": {\"audio_tokens\": null, \"reasoning_tokens\": 0}, \"prompt_tokens_details\": {\"audio_tokens\": null, \"cached_tokens\": 2176}}}}", + "value": "{\"message_id\": \"10400ac0-cfdc-5bf4-a3db-a18a1fa0cca5\", \"message_type\": \"assistant\", \"timestamp\": \"2024-10-03T22:44:59.813045\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-10-03T22:54:21.484798\", \"completion_end_time\": \"2024-10-03T22:54:22.505957\", \"search_query\": \"What was Uber's profit in 2020?\", \"search_results\": {\"vector_search_results\": [{\"chunk_id\": \"328e5142-bd6c-5553-b5a0-8fdbd72ee6c6\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.7446624344989735, \"text\": \"Revenue was $17.5 billion, or up 57% year-over-year, reflecting the overall growth in our Delivery business and an increase in Freight revenue attributable to the acquisition of Transplace in the fourth quarter of 2021 as well as growth in the number of shippers and carriers on the network combined with an increase in volumes with our top shippers.\\n\\nNet loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of our ATG Business to Aurora, a $1.6 billion pre-tax net benefit relating to Uber\\u2019s equity investments, as well as reductions in our fixed cost structure and increased variable cost efficiencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 445, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 53, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"chunk_id\": \"a0b5c2f6-7dcd-5865-b2c6-0b3cd2189e57\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.7071749146476451, \"text\": \"Total costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n100 %\\n\\n46 % 16 % 32 % 20 % 24 % 5 % 144 % (44)% (4)% (15)% (62)% (2)% \\u2014 % (61)% \\u2014 % (61)%\\n\\n100 %\\n\\n54 % 11 % 27 % 12 % 13 % 5 % 122 % (22)% (3)% 19 % (6)% (3)% \\u2014 % (3)% \\u2014 % (3)%\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n(1)\\n\\nTotals of percentage of revenues may not foot due to rounding.\\n\\nComparison of the Years Ended December 31, 2020 and 2021\\n\\nRevenue\\n\\nYear Ended December 31,\\n\\n(In millions, except percentages)\\n\\n2020\\n\\n2021\\n\\n2020 to 2021 % Change\\n\\nRevenue\\n\\n$\\n\\n11,139 $\\n\\n17,455\\n\\n57 %\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 463, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 57, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"chunk_id\": \"500bf649-b2a8-521b-bdb2-78cdc342531f\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6810148751433258, \"text\": \"Year Ended December 31, 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\\n\\nTotal costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n13,000 $\\n\\n6,061 2,302 4,626 4,836 3,299 472 21,596 (8,596) (559) 722 (8,433) 45 (34) (8,512) (6) (8,506) $\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\nNet loss attributable to Uber Technologies, Inc. Net loss per share attributable to Uber Technologies, Inc. common stockholders:\\n\\n$\\n\\nBasic\\n\\n$\\n\\n(6.81) $\\n\\n(3.86) $\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 574, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 77, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"chunk_id\": \"90b1f17b-a97f-5552-9951-fbc6df634039\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6806196963602422, \"text\": \"Less: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\n17,455\\n\\n9,351 1,877 4,789 2,054 2,316 902 21,289 (3,834) (483) 3,292 (1,025) (492) (37) (570) (74) (496)\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n$\\n\\n54\\n\\nThe following table sets forth the components of our consolidated statements of operations for each of the periods presented as a percentage of revenue\\n\\n(1)\\n\\n:\\n\\nYear Ended December 31, 2021 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 462, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 56, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"chunk_id\": \"845a2b04-70ee-5a70-91fa-44016677fd92\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6537216512130718, \"text\": \"The Uber Service activities are performed to satisfy our sole performance obligation in the transaction, which is to connect Drivers and Merchants with end-\\n\\nusers to facilitate the completion of a successful transaction.\\n\\nIn 2020, we began charging Mobility end-users a fee to use the platform in certain markets. In these transactions, in addition to a performance obligation to Drivers, we also have a performance obligation to end-users, which is to connect end-users to Drivers in the marketplace. We recognize revenue when a trip is complete. We present revenue on a net basis for these transactions, as we do not control the service provided by Drivers to end-users. For the years ended December 31, 2020 and 2021, we recognized total revenue of $323 million and $336 million, respectively, associated with these fees charged to end-users.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 642, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 90, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"chunk_id\": \"1739d713-3fb6-534f-8ddb-7ff9cd6484c7\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.638846836158823, \"text\": \"Other income (expense), net\\n\\n$\\n\\nDuring the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operations to Zomato recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (\\u201cSennder\\u201d) recognized in the fourth quarter of 2020, partially offset by a $27 million loss on the sale of our JUMP operations to Lime recognized in the second quarter of 2020.\\n\\n(1)\\n\\nDuring the year ended December 31, 2021, gain on business divestitures, net represented a $1.6 billion gain on the sale of our ATG Business to Aurora\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 799, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 118, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"chunk_id\": \"70e9089c-56e0-52f7-80ea-ad66fe1f9a79\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6322252592771936, \"text\": \"2019\\n\\n100.0 %\\n\\n60.2 17.6 41.6 22.5 32.8 174.7 (74.7) \\u2014 2.8 (71.9) 0.1 (72.0)%\\n\\n2019 to 2020 % Change\\n\\n(35) %\\n\\nsecond quarter of 2021. These increases were offset by investments in driver supply by increasing driver incentives recorded as a reduction to revenue by $942.9 million in 2021 as compared to the prior year as rider demand outpaced driver supply during certain periods of the pandemic recovery in 2021. Revenue in 2020 was also higher in the first quarter of 2020 prior to the implementation of shelter-in-place orders and other travel restrictions across North America beginning March 2020.\\n\\nWe expect to see continued recovery in demand for our platform and the resulting positive impacts on revenue as there are more widespread immunity levels, more communities reopen and other restrictive travel and social distancing measures in response to COVID-19 are eased. However, we cannot predict the impact of COVID variants and the longer term impact of the pandemic on consumer behavior.\\n\\nCost of Revenue\\n\\n2021\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 493, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 63, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"chunk_id\": \"5425859b-cbfa-54e4-9729-5f92c6f61efc\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6301008528290666, \"text\": \"For additional discussion, see the risk factor titled \\u201c\\u2014If we are unable to attract or maintain a critical mass of Drivers, consumers, merchants, shippers, and carriers, whether as a result of competition or other factors, our platform will become less appealing to platform users, and our financial results would be adversely impacted.\\u201d included in Part I, Item 1A of this Annual Report on Form 10-K as well our 2021 ESG Report and our 2021 People and Culture Report. The information in these reports is not a part of this Form 10-K.\\n\\nAdditional Information\\n\\nWe were founded in 2009 and incorporated as Ubercab, Inc., a Delaware corporation, in July 2010. In February 2011, we changed our name to Uber\\n\\nTechnologies, Inc. Our principal executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.\\n\\n10\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 12, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"chunk_id\": \"9dae5d7c-4bcd-52f0-bdfc-a9e327c56069\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6285498210400674, \"text\": \"Uber Technologies, Inc. (\\u201cUber,\\u201d \\u201cwe,\\u201d \\u201cour,\\u201d or \\u201cus\\u201d) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber is a technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B. Uber develops and operates proprietary technology applications supporting a variety of offerings on its platform (\\u201cplatform(s)\\u201d or \\u201cPlatform(s)\\u201d). Uber connects consumers (\\u201cRider(s)\\u201d) with independent providers of ride services (\\u201cMobility Driver(s)\\u201d) for ridesharing services, and connects Riders and other consumers (\\u201cEaters\\u201d) with restaurants, grocers and other stores (collectively, \\u201cMerchants\\u201d) with delivery service providers (\\u201cCouriers\\u201d) for meal preparation, grocery and other delivery services. Riders and Eaters are collectively referred to as \\u201cend-user(s)\\u201d or \\u201cconsumer(s).\\u201d Mobility Drivers and Couriers are collectively referred to as \\u201cDriver(s).\\u201d Uber also connects consumers with public\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 592, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 84, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"chunk_id\": \"9bba73a7-4ebf-51f2-8a55-553a93d2ac41\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.628432135926722, \"text\": \"Year Ended December 31, 2020\\n\\n1,000 49 1,189 (27) \\u2014 (138) (34) 8,939 (4) 3,824\\n\\n247 125 2,628 (527) (891) (224) 38 1,379 (92) (4,327)\\n\\n8,209 34 12,067 $\\n\\n12,067 (349) 7,391 $\\n\\n332 $ 133\\n\\n412 $ 82\\n\\n14,224 4,229 \\u2014 251 9 \\u2014 \\u2014\\n\\n\\u2014 \\u2014 \\u2014 196 3,898 171 1,634\\n\\n2021\\n\\n675 107 1,484 (27) (307) (226) 101 1,780 (69) 65\\n\\n7,391 349 7,805\\n\\n449 87\\n\\n\\u2014 \\u2014 232 184 1,868 1,018 \\u2014\\n\\nUBER TECHNOLOGIES, INC.\\n\\nNOTES TO CONSOLIDATED FINANCIAL STATEMENTS\\n\\nNote 1 \\u2013 Description of Business and Summary of Significant Accounting Policies\\n\\nDescription of Business\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 83, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}], \"graph_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-AEP09aw4hSfgVVU9Rl7pJxBUUidjO\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"Uber's profit in 2020 was not a profit but a net loss. The net loss attributable to Uber Technologies, Inc. for the year ended December 31, 2020, was $6,768 million [3].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1727996061, \"model\": \"gpt-4o-2024-08-06\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_e5e4913e83\", \"usage\": {\"completion_tokens\": 47, \"prompt_tokens\": 2320, \"total_tokens\": 2367, \"completion_tokens_details\": {\"audio_tokens\": null, \"reasoning_tokens\": 0}, \"prompt_tokens_details\": {\"audio_tokens\": null, \"cached_tokens\": 2176}}}}", "timestamp": "2024-10-03 22:54:22" }, { "key": "completion_record", - "value": "{\"message_id\": \"3b3d27e8-f949-52e2-85d0-00ac4709d44d\", \"message_type\": \"assistant\", \"timestamp\": \"2024-10-03T22:44:59.813045\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-10-03T22:54:22.800521\", \"completion_end_time\": \"2024-10-03T22:54:23.828972\", \"search_query\": \"Who is John Snow?\", \"search_results\": {\"vector_search_results\": [{\"extraction_id\": \"c08344bb-1740-5330-a6e1-00b558a0008c\", \"document_id\": \"e797da22-8c5d-54e5-bed5-a55954cf6bf9\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.20639122052297343, \"text\": \"\\n\\nAn NFT That Saves Lives\\n\\nMay 2021Noora Health, a nonprofit I've\\nsupported for years, just launched\\na new NFT. It has a dramatic name, Save Thousands of Lives,\\nbecause that's what the proceeds will do.Noora has been saving lives for 7 years. They run programs in\\nhospitals in South Asia to teach new mothers how to take care of\\ntheir babies once they get home. They're in 165 hospitals now. And\\nbecause they know the numbers before and after they start at a new\\nhospital, they can measure the impact they have. It is massive.\\nFor every 1000 live births, they save 9 babies.This number comes from a study\\nof 133,733 families at 28 different\\nhospitals that Noora conducted in collaboration with the Better\\nBirth team at Ariadne Labs, a joint center for health systems\\ninnovation at Brigham and Women\\u2019s Hospital and Harvard T.H. Chan\\nSchool of Public Health.Noora is so effective that even if you measure their costs in the\\nmost conservative way, by dividing their entire budget by the number\\nof lives saved, the cost of saving a life is the lowest I've seen.\\n$1,235.For this NFT, they're going to issue a public report tracking how\\nthis specific tranche of money is spent, and estimating the number\\nof lives saved as a result.NFTs are a new territory, and this way of using them is especially\\nnew, but I'm excited about its potential. And I'm excited to see\\nwhat happens with this particular auction, because unlike an NFT\\nrepresenting something that has already happened,\\nthis NFT gets better as the price gets higher.The reserve price was about $2.5 million, because that's what it\\ntakes for the name to be accurate: that's what it costs to save\\n2000 lives. But the higher the price of this NFT goes, the more\\nlives will be saved. What a sentence to be able to write.\\n\\n\\n \\n\\n\\n\\n \\n\\n\", \"metadata\": {\"version\": \"v0\", \"chunk_id\": 0, \"chunk_order\": 0, \"document_type\": \"html\", \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"extraction_id\": \"996675d0-381f-5b26-b4db-5dcc72babdc2\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.17490996867954944, \"text\": \"Shared and Shared Saver Rides enables unrelated parties traveling along similar routes to benefit from a discounted fare at the cost of possibly longer travel times. With a Shared or Shared Saver Ride, when the first rider requests a ride, our algorithms use the first rider\\u2019s destination and attempt to match them with other riders traveling along a similar route. If a match between riders is made, our algorithms re-route the driver to include the pick-up location of the matched rider on the active route. For Shared and Shared Saver Rides, drivers earn a fixed amount based on a number of factors, including the time and distance of the ride, the base fare charged to riders and the level of rider demand. We determine the rider fare based on the predicted time and distance of the ride, the level of rider demand and the likelihood of being able to match additional riders along the given route, and such fare is quoted to the riders prior to their commitment to the ride. The fare charged to the riders is decoupled\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 276, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 36, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"extraction_id\": \"2ff890d6-cb3f-5c17-88c0-5194b98ba56e\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.16959259872524757, \"text\": \"s, drivers, and the communities they serve.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"extraction_id\": \"b5e169c0-9779-5e30-a644-7bdf8308d8a5\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.16769101216250615, \"text\": \"Our Proprietary Data-Driven Technology Platform\\n\\nOur robust technology platform powers the millions of rides and connections that we facilitate every day and provides insights that drive our platform in real-time. We leverage historical data to continuously improve experiences for drivers and riders on our platform. Our platform analyzes large datasets covering the ride lifecycle, from when drivers go online and riders request rides, to when they match, which route to take and any feedback given after the rides. Utilizing machine learning capabilities to predict future behavior based on many years of historical data and use cases, we employ various levers to balance supply and demand in the marketplace, creating increased driver earnings while maintaining strong service levels for riders. We also leverage our data science and algorithms to inform our product development.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 42, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 8, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"extraction_id\": \"2d5c5f3b-571b-5a4a-a8ce-e07922823f78\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.16550283637625984, \"text\": \"Several Swiss administrative bodies have issued decisions in which they classify Drivers as employees of Uber Switzerland, Rasier Operations B.V. or of Uber B.V. for social security or regulatory purposes. We are challenging each of them before the Social Security and Administrative Tribunals. In April 2021, a ruling was made that Uber Switzerland could not be held liable for social security contributions. The litigations with regards to Uber B.V. and Raiser Operations B.V. are still pending for years 2014 to 2019. In January 2022, the Social Security Tribunal of Zurich reclassified drivers who have used the App in 2014 as dependent workers of Uber BV and Rasier Operations BV from a social security standpoint, but this ruling has been appealed before the Federal Tribunal and has no impact on our current operations. The ultimate resolution of the social security matters for the other two entities is uncertain and the amount accrued for this matter is recorded within accrued and other current liabilities on the\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 855, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 130, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"extraction_id\": \"d0449e9c-80cb-5873-bb89-ada360f473cf\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.15934575684717212, \"text\": \"Universal Vaccine Access Campaign - mobilizes a coalition of partners to provide rides to and from COVID-19 vaccination sites for low-income, underinsured and at-risk communities;\\n\\nDisaster Response - provides rides to access vital services both leading up to and in the wake of disasters and other local emergencies when roads are safe to do so; and\\n\\nVoting Access - provides rides to the polls during Federal elections, with a focus on supporting individuals who traditionally face barriers to voting, such as seniors, veterans and communities of color.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 80, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"extraction_id\": \"123a19db-e2ed-5112-9fbd-19afb707ffcb\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.1561906933784496, \"text\": \"COVID-19\\n\\nIn March 2020, the World Health Organization declared the outbreak of coronavirus (\\u201cCOVID-19\\u201d) a pandemic. The COVID-19 pandemic has rapidly changed market and economic conditions globally, impacting Drivers, Merchants, consumers and business partners, as well as our business, results of operations, financial position, and cash flows. Various governmental restrictions, including the declaration of a federal National Emergency, multiple cities\\u2019 and states\\u2019 declarations of states of emergency, school and business closings, quarantines, restrictions on travel, limitations on social or public gatherings, and other measures have, and may continue to have, an adverse impact on our business and operations, including, for example, by reducing the global demand for Mobility rides. Furthermore, we are experiencing and expect to continue to experience Driver supply constraints, and such supply constraints have been and may continue to be impacted by concerns regarding the COVID-19 pandemic.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 426, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 51, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"extraction_id\": \"00983240-785a-5f53-ba0c-2f848e6f29bd\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.1539415625368621, \"text\": \"nsumers with public transportation networks. Uber uses this same network, technology, operational excellence and product expertise to connect shippers with carriers in the freight industry. Uber is also developing technologies that will provide new solutions to solve everyday problems.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 593, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 84, \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"extraction_id\": \"37b90a06-bb7d-5146-b667-18f63610ad8c\", \"document_id\": \"d421207a-d799-5806-8d67-46b2005b15d4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.15079258666171103, \"text\": \"https://www.ycombinator.com/companies/watsi\\n\\nhttps://www.ycombinator.com/companies/movley\\n\\nhttps://www.ycombinator.com/companies/heypurple\\n\\nhttps://www.ycombinator.com/companies/pointhound\\n\\nhttps://www.ycombinator.com/companies/reworkd\\n\\nhttps://www.ycombinator.com/companies/shoobs\\n\\nhttps://www.ycombinator.com/companies/strada\\n\\nhttps://www.ycombinator.com/companies/sweep\\n\\nhttps://www.ycombinator.com/companies/terminal\\n\\nhttps://www.ycombinator.com/companies/sante\\n\\nhttps://www.ycombinator.com/companies/sprx\\n\\nhttps://www.ycombinator.com/companies/sails-co\\n\\nhttps://www.ycombinator.com/companies/dyspatch\\n\\nhttps://www.ycombinator.com/companies/orbio-earth\\n\\nhttps://www.ycombinator.com/companies/epsilon\\n\\nhttps://www.ycombinator.com/companies/new-story\\n\\nhttps://www.ycombinator.com/companies/hatchet-2\\n\\nhttps://www.ycombinator.com/companies/epsilla\\n\\nhttps://www.ycombinator.com/companies/resend\\n\\nhttps://www.ycombinator.com/companies/teamnote\\n\\nhttps://www.ycombinator.com/companies/thread-2\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 19, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"extraction_id\": \"c775cf38-8737-59e8-96fc-4e403041eade\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.1487142056993126, \"text\": \"COVID-19 Response Initiatives\\n\\nWe continue to prioritize the health and safety of our consumers, Drivers and Merchants, our employees and the communities we serve and continue to believe we will play an important role in the economic recovery of cities around the globe. We are focused on navigating the challenges presented by COVID-19 through preserving our liquidity and managing our cash flow by taking preemptive action to enhance our ability to meet our short-term liquidity needs. The pandemic has reduced the demand for our Mobility offering globally, while accelerating the growth of our Delivery offerings. We have responded to the COVID-19 pandemic by launching new, or expanding existing, services or features on an expedited basis, particularly those related to delivery of food and other goods.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 427, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 51, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-AEP0BjYF7baSJqZHlIb8v7SK3o0hs\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"The provided context does not contain any information about John Snow. Therefore, I am unable to provide an answer based on the given context.\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1727996063, \"model\": \"gpt-4o-2024-08-06\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_e5e4913e83\", \"usage\": {\"completion_tokens\": 27, \"prompt_tokens\": 1904, \"total_tokens\": 1931, \"completion_tokens_details\": {\"audio_tokens\": null, \"reasoning_tokens\": 0}, \"prompt_tokens_details\": {\"audio_tokens\": null, \"cached_tokens\": 1664}}}}", + "value": "{\"message_id\": \"3b3d27e8-f949-52e2-85d0-00ac4709d44d\", \"message_type\": \"assistant\", \"timestamp\": \"2024-10-03T22:44:59.813045\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-10-03T22:54:22.800521\", \"completion_end_time\": \"2024-10-03T22:54:23.828972\", \"search_query\": \"Who is John Snow?\", \"search_results\": {\"vector_search_results\": [{\"chunk_id\": \"c08344bb-1740-5330-a6e1-00b558a0008c\", \"document_id\": \"e797da22-8c5d-54e5-bed5-a55954cf6bf9\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.20639122052297343, \"text\": \"\\n\\nAn NFT That Saves Lives\\n\\nMay 2021Noora Health, a nonprofit I've\\nsupported for years, just launched\\na new NFT. It has a dramatic name, Save Thousands of Lives,\\nbecause that's what the proceeds will do.Noora has been saving lives for 7 years. They run programs in\\nhospitals in South Asia to teach new mothers how to take care of\\ntheir babies once they get home. They're in 165 hospitals now. And\\nbecause they know the numbers before and after they start at a new\\nhospital, they can measure the impact they have. It is massive.\\nFor every 1000 live births, they save 9 babies.This number comes from a study\\nof 133,733 families at 28 different\\nhospitals that Noora conducted in collaboration with the Better\\nBirth team at Ariadne Labs, a joint center for health systems\\ninnovation at Brigham and Women\\u2019s Hospital and Harvard T.H. Chan\\nSchool of Public Health.Noora is so effective that even if you measure their costs in the\\nmost conservative way, by dividing their entire budget by the number\\nof lives saved, the cost of saving a life is the lowest I've seen.\\n$1,235.For this NFT, they're going to issue a public report tracking how\\nthis specific tranche of money is spent, and estimating the number\\nof lives saved as a result.NFTs are a new territory, and this way of using them is especially\\nnew, but I'm excited about its potential. And I'm excited to see\\nwhat happens with this particular auction, because unlike an NFT\\nrepresenting something that has already happened,\\nthis NFT gets better as the price gets higher.The reserve price was about $2.5 million, because that's what it\\ntakes for the name to be accurate: that's what it costs to save\\n2000 lives. But the higher the price of this NFT goes, the more\\nlives will be saved. What a sentence to be able to write.\\n\\n\\n \\n\\n\\n\\n \\n\\n\", \"metadata\": {\"version\": \"v0\", \"chunk_id\": 0, \"chunk_order\": 0, \"document_type\": \"html\", \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"chunk_id\": \"996675d0-381f-5b26-b4db-5dcc72babdc2\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.17490996867954944, \"text\": \"Shared and Shared Saver Rides enables unrelated parties traveling along similar routes to benefit from a discounted fare at the cost of possibly longer travel times. With a Shared or Shared Saver Ride, when the first rider requests a ride, our algorithms use the first rider\\u2019s destination and attempt to match them with other riders traveling along a similar route. If a match between riders is made, our algorithms re-route the driver to include the pick-up location of the matched rider on the active route. For Shared and Shared Saver Rides, drivers earn a fixed amount based on a number of factors, including the time and distance of the ride, the base fare charged to riders and the level of rider demand. We determine the rider fare based on the predicted time and distance of the ride, the level of rider demand and the likelihood of being able to match additional riders along the given route, and such fare is quoted to the riders prior to their commitment to the ride. The fare charged to the riders is decoupled\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 276, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 36, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"chunk_id\": \"2ff890d6-cb3f-5c17-88c0-5194b98ba56e\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.16959259872524757, \"text\": \"s, drivers, and the communities they serve.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"chunk_id\": \"b5e169c0-9779-5e30-a644-7bdf8308d8a5\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.16769101216250615, \"text\": \"Our Proprietary Data-Driven Technology Platform\\n\\nOur robust technology platform powers the millions of rides and connections that we facilitate every day and provides insights that drive our platform in real-time. We leverage historical data to continuously improve experiences for drivers and riders on our platform. Our platform analyzes large datasets covering the ride lifecycle, from when drivers go online and riders request rides, to when they match, which route to take and any feedback given after the rides. Utilizing machine learning capabilities to predict future behavior based on many years of historical data and use cases, we employ various levers to balance supply and demand in the marketplace, creating increased driver earnings while maintaining strong service levels for riders. We also leverage our data science and algorithms to inform our product development.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 42, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 8, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"chunk_id\": \"2d5c5f3b-571b-5a4a-a8ce-e07922823f78\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.16550283637625984, \"text\": \"Several Swiss administrative bodies have issued decisions in which they classify Drivers as employees of Uber Switzerland, Rasier Operations B.V. or of Uber B.V. for social security or regulatory purposes. We are challenging each of them before the Social Security and Administrative Tribunals. In April 2021, a ruling was made that Uber Switzerland could not be held liable for social security contributions. The litigations with regards to Uber B.V. and Raiser Operations B.V. are still pending for years 2014 to 2019. In January 2022, the Social Security Tribunal of Zurich reclassified drivers who have used the App in 2014 as dependent workers of Uber BV and Rasier Operations BV from a social security standpoint, but this ruling has been appealed before the Federal Tribunal and has no impact on our current operations. The ultimate resolution of the social security matters for the other two entities is uncertain and the amount accrued for this matter is recorded within accrued and other current liabilities on the\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 855, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 130, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"chunk_id\": \"d0449e9c-80cb-5873-bb89-ada360f473cf\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.15934575684717212, \"text\": \"Universal Vaccine Access Campaign - mobilizes a coalition of partners to provide rides to and from COVID-19 vaccination sites for low-income, underinsured and at-risk communities;\\n\\nDisaster Response - provides rides to access vital services both leading up to and in the wake of disasters and other local emergencies when roads are safe to do so; and\\n\\nVoting Access - provides rides to the polls during Federal elections, with a focus on supporting individuals who traditionally face barriers to voting, such as seniors, veterans and communities of color.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 80, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"chunk_id\": \"123a19db-e2ed-5112-9fbd-19afb707ffcb\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.1561906933784496, \"text\": \"COVID-19\\n\\nIn March 2020, the World Health Organization declared the outbreak of coronavirus (\\u201cCOVID-19\\u201d) a pandemic. The COVID-19 pandemic has rapidly changed market and economic conditions globally, impacting Drivers, Merchants, consumers and business partners, as well as our business, results of operations, financial position, and cash flows. Various governmental restrictions, including the declaration of a federal National Emergency, multiple cities\\u2019 and states\\u2019 declarations of states of emergency, school and business closings, quarantines, restrictions on travel, limitations on social or public gatherings, and other measures have, and may continue to have, an adverse impact on our business and operations, including, for example, by reducing the global demand for Mobility rides. Furthermore, we are experiencing and expect to continue to experience Driver supply constraints, and such supply constraints have been and may continue to be impacted by concerns regarding the COVID-19 pandemic.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 426, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 51, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"chunk_id\": \"00983240-785a-5f53-ba0c-2f848e6f29bd\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.1539415625368621, \"text\": \"nsumers with public transportation networks. Uber uses this same network, technology, operational excellence and product expertise to connect shippers with carriers in the freight industry. Uber is also developing technologies that will provide new solutions to solve everyday problems.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 593, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 84, \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"chunk_id\": \"37b90a06-bb7d-5146-b667-18f63610ad8c\", \"document_id\": \"d421207a-d799-5806-8d67-46b2005b15d4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.15079258666171103, \"text\": \"https://www.ycombinator.com/companies/watsi\\n\\nhttps://www.ycombinator.com/companies/movley\\n\\nhttps://www.ycombinator.com/companies/heypurple\\n\\nhttps://www.ycombinator.com/companies/pointhound\\n\\nhttps://www.ycombinator.com/companies/reworkd\\n\\nhttps://www.ycombinator.com/companies/shoobs\\n\\nhttps://www.ycombinator.com/companies/strada\\n\\nhttps://www.ycombinator.com/companies/sweep\\n\\nhttps://www.ycombinator.com/companies/terminal\\n\\nhttps://www.ycombinator.com/companies/sante\\n\\nhttps://www.ycombinator.com/companies/sprx\\n\\nhttps://www.ycombinator.com/companies/sails-co\\n\\nhttps://www.ycombinator.com/companies/dyspatch\\n\\nhttps://www.ycombinator.com/companies/orbio-earth\\n\\nhttps://www.ycombinator.com/companies/epsilon\\n\\nhttps://www.ycombinator.com/companies/new-story\\n\\nhttps://www.ycombinator.com/companies/hatchet-2\\n\\nhttps://www.ycombinator.com/companies/epsilla\\n\\nhttps://www.ycombinator.com/companies/resend\\n\\nhttps://www.ycombinator.com/companies/teamnote\\n\\nhttps://www.ycombinator.com/companies/thread-2\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 19, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"chunk_id\": \"c775cf38-8737-59e8-96fc-4e403041eade\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.1487142056993126, \"text\": \"COVID-19 Response Initiatives\\n\\nWe continue to prioritize the health and safety of our consumers, Drivers and Merchants, our employees and the communities we serve and continue to believe we will play an important role in the economic recovery of cities around the globe. We are focused on navigating the challenges presented by COVID-19 through preserving our liquidity and managing our cash flow by taking preemptive action to enhance our ability to meet our short-term liquidity needs. The pandemic has reduced the demand for our Mobility offering globally, while accelerating the growth of our Delivery offerings. We have responded to the COVID-19 pandemic by launching new, or expanding existing, services or features on an expedited basis, particularly those related to delivery of food and other goods.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 427, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 51, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}], \"graph_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-AEP0BjYF7baSJqZHlIb8v7SK3o0hs\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"The provided context does not contain any information about John Snow. Therefore, I am unable to provide an answer based on the given context.\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1727996063, \"model\": \"gpt-4o-2024-08-06\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_e5e4913e83\", \"usage\": {\"completion_tokens\": 27, \"prompt_tokens\": 1904, \"total_tokens\": 1931, \"completion_tokens_details\": {\"audio_tokens\": null, \"reasoning_tokens\": 0}, \"prompt_tokens_details\": {\"audio_tokens\": null, \"cached_tokens\": 1664}}}}", "timestamp": "2024-10-03 22:54:23" }, { diff --git a/py/tests/regression/observed_outputs/test_retrieval.json b/py/tests/regression/observed_outputs/test_retrieval.json index 5cf5ed428..60f097cbc 100644 --- a/py/tests/regression/observed_outputs/test_retrieval.json +++ b/py/tests/regression/observed_outputs/test_retrieval.json @@ -3,7 +3,7 @@ "results": { "vector_search_results": [ { - "extraction_id": "0484dba9-2b51-5012-9aad-e6efe7e6688f", + "chunk_id": "0484dba9-2b51-5012-9aad-e6efe7e6688f", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -25,7 +25,7 @@ } }, { - "extraction_id": "afa9d545-b0fb-57d2-aa8e-47b874b5671e", + "chunk_id": "afa9d545-b0fb-57d2-aa8e-47b874b5671e", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -47,7 +47,7 @@ } }, { - "extraction_id": "57a92100-4201-5909-8794-229f3f111cf9", + "chunk_id": "57a92100-4201-5909-8794-229f3f111cf9", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -69,7 +69,7 @@ } }, { - "extraction_id": "0621a428-a8a1-505f-81c0-b7d6daceda9a", + "chunk_id": "0621a428-a8a1-505f-81c0-b7d6daceda9a", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -91,7 +91,7 @@ } }, { - "extraction_id": "1163974b-141e-50b7-8d19-d8d3d9143410", + "chunk_id": "1163974b-141e-50b7-8d19-d8d3d9143410", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -113,7 +113,7 @@ } }, { - "extraction_id": "a2660071-661b-5928-ad52-c4106ea95ae9", + "chunk_id": "a2660071-661b-5928-ad52-c4106ea95ae9", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -135,7 +135,7 @@ } }, { - "extraction_id": "8e93af52-b1cd-5c64-afa4-e3a7fcdf412b", + "chunk_id": "8e93af52-b1cd-5c64-afa4-e3a7fcdf412b", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -157,7 +157,7 @@ } }, { - "extraction_id": "ccacb15d-aef0-5143-b448-380401c71cd1", + "chunk_id": "ccacb15d-aef0-5143-b448-380401c71cd1", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -179,7 +179,7 @@ } }, { - "extraction_id": "6a340f36-ef68-59dd-b8a7-a5f5d6cd6d00", + "chunk_id": "6a340f36-ef68-59dd-b8a7-a5f5d6cd6d00", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -201,7 +201,7 @@ } }, { - "extraction_id": "0a7e6a54-3804-5b5b-a9cd-a5b4a4753483", + "chunk_id": "0a7e6a54-3804-5b5b-a9cd-a5b4a4753483", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -223,7 +223,7 @@ } } ], - "kg_search_results": null + "graph_search_results": null } }, "basic_rag": { @@ -266,7 +266,7 @@ "search_results": { "vector_search_results": [ { - "extraction_id": "328e5142-bd6c-5553-b5a0-8fdbd72ee6c6", + "chunk_id": "328e5142-bd6c-5553-b5a0-8fdbd72ee6c6", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -288,7 +288,7 @@ } }, { - "extraction_id": "a0b5c2f6-7dcd-5865-b2c6-0b3cd2189e57", + "chunk_id": "a0b5c2f6-7dcd-5865-b2c6-0b3cd2189e57", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -310,7 +310,7 @@ } }, { - "extraction_id": "500bf649-b2a8-521b-bdb2-78cdc342531f", + "chunk_id": "500bf649-b2a8-521b-bdb2-78cdc342531f", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -332,7 +332,7 @@ } }, { - "extraction_id": "90b1f17b-a97f-5552-9951-fbc6df634039", + "chunk_id": "90b1f17b-a97f-5552-9951-fbc6df634039", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -354,7 +354,7 @@ } }, { - "extraction_id": "845a2b04-70ee-5a70-91fa-44016677fd92", + "chunk_id": "845a2b04-70ee-5a70-91fa-44016677fd92", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -376,7 +376,7 @@ } }, { - "extraction_id": "1739d713-3fb6-534f-8ddb-7ff9cd6484c7", + "chunk_id": "1739d713-3fb6-534f-8ddb-7ff9cd6484c7", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -398,7 +398,7 @@ } }, { - "extraction_id": "70e9089c-56e0-52f7-80ea-ad66fe1f9a79", + "chunk_id": "70e9089c-56e0-52f7-80ea-ad66fe1f9a79", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -420,7 +420,7 @@ } }, { - "extraction_id": "5425859b-cbfa-54e4-9729-5f92c6f61efc", + "chunk_id": "5425859b-cbfa-54e4-9729-5f92c6f61efc", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -442,7 +442,7 @@ } }, { - "extraction_id": "9dae5d7c-4bcd-52f0-bdfc-a9e327c56069", + "chunk_id": "9dae5d7c-4bcd-52f0-bdfc-a9e327c56069", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -464,7 +464,7 @@ } }, { - "extraction_id": "9bba73a7-4ebf-51f2-8a55-553a93d2ac41", + "chunk_id": "9bba73a7-4ebf-51f2-8a55-553a93d2ac41", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -486,7 +486,7 @@ } } ], - "kg_search_results": null + "graph_search_results": null } } }, @@ -530,7 +530,7 @@ "search_results": { "vector_search_results": [ { - "extraction_id": "c08344bb-1740-5330-a6e1-00b558a0008c", + "chunk_id": "c08344bb-1740-5330-a6e1-00b558a0008c", "document_id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -548,7 +548,7 @@ } }, { - "extraction_id": "996675d0-381f-5b26-b4db-5dcc72babdc2", + "chunk_id": "996675d0-381f-5b26-b4db-5dcc72babdc2", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -570,7 +570,7 @@ } }, { - "extraction_id": "2ff890d6-cb3f-5c17-88c0-5194b98ba56e", + "chunk_id": "2ff890d6-cb3f-5c17-88c0-5194b98ba56e", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -593,7 +593,7 @@ } }, { - "extraction_id": "b5e169c0-9779-5e30-a644-7bdf8308d8a5", + "chunk_id": "b5e169c0-9779-5e30-a644-7bdf8308d8a5", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -615,7 +615,7 @@ } }, { - "extraction_id": "2d5c5f3b-571b-5a4a-a8ce-e07922823f78", + "chunk_id": "2d5c5f3b-571b-5a4a-a8ce-e07922823f78", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -637,7 +637,7 @@ } }, { - "extraction_id": "d0449e9c-80cb-5873-bb89-ada360f473cf", + "chunk_id": "d0449e9c-80cb-5873-bb89-ada360f473cf", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -659,7 +659,7 @@ } }, { - "extraction_id": "123a19db-e2ed-5112-9fbd-19afb707ffcb", + "chunk_id": "123a19db-e2ed-5112-9fbd-19afb707ffcb", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -681,7 +681,7 @@ } }, { - "extraction_id": "00983240-785a-5f53-ba0c-2f848e6f29bd", + "chunk_id": "00983240-785a-5f53-ba0c-2f848e6f29bd", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -704,7 +704,7 @@ } }, { - "extraction_id": "37b90a06-bb7d-5146-b667-18f63610ad8c", + "chunk_id": "37b90a06-bb7d-5146-b667-18f63610ad8c", "document_id": "d421207a-d799-5806-8d67-46b2005b15d4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -725,7 +725,7 @@ } }, { - "extraction_id": "c775cf38-8737-59e8-96fc-4e403041eade", + "chunk_id": "c775cf38-8737-59e8-96fc-4e403041eade", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "collection_ids": [ @@ -747,7 +747,7 @@ } } ], - "kg_search_results": null + "graph_search_results": null } } }, @@ -757,7 +757,7 @@ "choices": [ { "message": { - "content": "[{\"extraction_id\": \"31e7a71c-0f89-5b27-972e-89bb8eb1415a\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6675316889565188, \"text\": \"Total Stockholders\\u2019 Equity (Deficit) 1,676,163\\n\\n5,184\\n\\n\\u2014\\n\\n(26,298)\\n\\n28,637\\n\\n(1) 721,710 (2,038) (1,009,359) 1,393,998\\n\\nLyft, Inc. Consolidated Statements of Cash Flows (in thousands)\\n\\n2021\\n\\nCash flows from operating activities Net loss Adjustments to reconcile net loss to net cash used in operating activities\\n\\n$\\n\\n(1,009,359)\\n\\nDepreciation and amortization Stock-based compensation Amortization of premium on marketable securities Accretion of discount on marketable securities Amortization of debt discount and issuance costs Deferred income tax from convertible senior notes Loss on sale and disposal of assets, net Gain on divestiture Other Changes in operating assets and liabilities, net effects of acquisition\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 572, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 82, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"ab62fbab-c5f8-5b3d-ab2e-2484c77c81fb\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6647442183050991, \"text\": \"79\\n\\n2019 3,615,960\\n\\n2,176,469 636,116 1,505,640 814,122 1,186,093 6,318,440 (2,702,480) \\u2014 102,595 (2,599,885) 2,356 (2,602,241)\\n\\n(11.44)\\n\\n227,498\\n\\n81,321 75,212 971,941 72,046 398,791\\n\\nLyft, Inc. Consolidated Statements of Comprehensive Loss (in thousands)\\n\\nNet loss Other comprehensive income (loss)\\n\\n$\\n\\nYear Ended December 31, 2020 (1,752,857) $\\n\\n2021 (1,009,359) $\\n\\nForeign currency translation adjustment Unrealized gain (loss) on marketable securities, net of taxes\\n\\nOther comprehensive income (loss)\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 567, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 79, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"66efee73-7df1-5786-b56b-3b0a6f9bf390\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6629626355789533, \"text\": \"Overview\\n\\nLyft, Inc (the \\u201cCompany\\u201d or \\u201cLyft\\u201d) started a movement to revolutionize transportation. In 2012, we launched our peer-to-peer marketplace for on-demand ridesharing and have continued to pioneer innovations aligned with our mission. Today, Lyft is one of the largest multimodal transportation networks in the United States and Canada.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 16, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"f043e24f-5d5d-531e-973a-277e65f3b10e\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6602276170118095, \"text\": \"Revenues from Contracts with Customers (ASC 606)\\n\\nWe generate substantially all our revenue from our ridesharing marketplace that connects drivers and riders. We recognize revenue from fees paid by drivers for use of our Lyft Platform offerings in accordance with ASC 606 as described in Note 2 of the notes to our consolidated financial statements. Drivers enter into terms of service (\\u201cToS\\u201d) with us in order to use our Lyft Driver App.\\n\\n58\\n\\n2019 to 2020 % Change\\n\\n19.0% (1.8)% (6.7)% 2.3%\\n\\nWe provide a service to drivers to complete a successful transportation service for riders. This service includes on-demand lead generation that assists drivers to find, receive and fulfill on-demand requests from riders seeking transportation services and related collection activities using our Lyft Platform. As a result, our single performance obligation in the transaction is to connect drivers with riders to facilitate the completion of a successful transportation service for riders.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 459, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 58, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"095cb246-80ec-5c35-96b4-ba902851e0e7\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6476712260834182, \"text\": \"Corporate Information\\n\\nWe were incorporated in 2007 as Bounder Web, Inc., a Delaware corporation. In 2008, we changed our name to Zimride, Inc. We founded Lyft in 2012 and\\n\\nchanged our name to Lyft, Inc. in 2013 when we sold the assets related to our Zimride operations.\\n\\n13\\n\\nAvailable Information\\n\\nOur website is located at www.lyft.com, and our investor relations website is located at investor.lyft.com. Copies of our Annual Report on Form 10-K, Quarterly Reports on Form 10-Q, Current Reports on Form 8-K and amendments to these reports filed or furnished pursuant to Section 13(a) or 15(d) of the Exchange Act, as amended, are available free of charge on our investor relations website as soon as reasonably practicable after we file such material electronically with or furnish it to the Securities and Exchange Commission (the \\u201cSEC\\u201d). The SEC also maintains a website that contains our SEC filings at www.sec.gov.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 82, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"0da7d65c-a0e7-541f-a404-71f32346d988\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6405097674715796, \"text\": \"We generate substantially all of our revenue from our ridesharing marketplace that connects drivers and riders. We collect service fees and commissions from drivers for their use of our ridesharing marketplace. As drivers accept more rider leads and complete more rides, we earn more revenue. We also generate revenue from riders renting Light Vehicles, drivers renting vehicles through Express Drive, Lyft Rentals renters, Lyft Driver Center and Lyft Auto Care users, and by making our ridesharing marketplace available to organizations through our Lyft Business offerings, such as our Concierge and Corporate Business Travel programs. In the second quarter of 2021, we began generating revenues from licensing and data access agreements, primarily with third-party autonomous vehicle companies.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 20, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"f7cdb289-ca94-5e40-909d-7a01a8a5d378\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6394687509853404, \"text\": \"Revenue Recognition\\n\\nThe Company generates its revenue from its multimodal transportation networks that offer access to a variety of transportation options through the Lyft Platform and mobile-based applications. Substantially all of the Company\\u2019s revenue is generated from its ridesharing marketplace that connects drivers and riders and is recognized in accordance with Accounting Standards Codification Topic 606 (\\u201cASC 606\\u201d). In addition, the Company generates revenue in accordance with ASC 606 from licensing and data access, primarily with third-party autonomous vehicle companies. The Company also generates rental revenue from Flexdrive, its network of Light Vehicles and Lyft Rentals, which is recognized in accordance with Accounting Standards Codification Topic 842 (\\u201cASC 842\\u201d).\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 86, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"14c06942-0f82-5a5f-9936-03919f6dac96\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6308834176729083, \"text\": \"Light Vehicle Rider and Lyft Rentals Renter Incentives\\n\\nIncentives offered to Light Vehicle riders and Lyft Rentals renters were not material for the years ended December 31, 2021 and 2020.\\n\\nFor the years ended December 31, 2021, 2020 and 2019, in relation to the driver, rider, Light Vehicle riders and Lyft Rentals renters incentive programs, the Company recorded $1.3 billion, $390.8 million and $560.3 million as a reduction to revenue and $64.7 million, $135.0 million and $381.5 million as sales and marketing expense, respectively.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 611, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 89, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"e0f5bd80-c3d2-58d6-a310-e04fa1618a5a\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6294196468937618, \"text\": \"Software Development Costs\\n\\nThe Company incurs costs related to developing the Lyft Platform and related support systems. The Company capitalizes development costs related to the Lyft Platform and related support systems once the preliminary project stage is complete and it is probable that the project will be completed and the software will be used to perform the function intended. The Company capitalized $16.2 million and $12.8 million of software development costs during the year ended December 31, 2021 and 2020, respectively. For the year ended December 31, 2019, capitalized software development costs was not material.\\n\\nInsurance Reserves\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 649, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 94, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"extraction_id\": \"ab302f43-5dcf-5e04-82ee-754565cd1cda\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6209709459253888, \"text\": \"32.1\\u2020\\n\\nCertifications of Principal Executive Officer and Principal Financial Officer pursuant to 18 U.S.C. Section 1350, as adopted pursuant to Section 906 of the Sarbanes-Oxley Act of 2002.\\n\\n101\\n\\nThe following financial information from Lyft, Inc.\\u2019s Annual Report on Form 10-K for the fiscal year ended December 31, 2021 formatted in Inline XBRL (eXtensible Business Reporting Language): (i) Consolidated Statements of Operations for the fiscal years ended December 31, 2021, 2020 and 2019; (ii) Consolidated Statements of Comprehensive Income (Loss) for the fiscal years ended December 31, 2021, 2020, and 2019; (iii) Consolidated Balance Sheets as of December 31, 2021 and 2020; (iv) Consolidated Statements of Cash Flows for the fiscal years ended December 31, 2021, 2020, and 2019; (v) Consolidated Statements of Redeemable Convertible Preferred Stock and Stockholders\\u2019 Equity for the fiscal years ended December 31, 2021, 2020, and 2019; and (vi) Notes to the Consolidated Financial Statements.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 817, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 127, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}]Lyft's profit in 2020 was not a profit but a net loss. According to the provided context, Lyft reported a net loss of $1,752,857,000 for the year ended December 31, 2020 [2]." + "content": "[{\"chunk_id\": \"31e7a71c-0f89-5b27-972e-89bb8eb1415a\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6675316889565188, \"text\": \"Total Stockholders\\u2019 Equity (Deficit) 1,676,163\\n\\n5,184\\n\\n\\u2014\\n\\n(26,298)\\n\\n28,637\\n\\n(1) 721,710 (2,038) (1,009,359) 1,393,998\\n\\nLyft, Inc. Consolidated Statements of Cash Flows (in thousands)\\n\\n2021\\n\\nCash flows from operating activities Net loss Adjustments to reconcile net loss to net cash used in operating activities\\n\\n$\\n\\n(1,009,359)\\n\\nDepreciation and amortization Stock-based compensation Amortization of premium on marketable securities Accretion of discount on marketable securities Amortization of debt discount and issuance costs Deferred income tax from convertible senior notes Loss on sale and disposal of assets, net Gain on divestiture Other Changes in operating assets and liabilities, net effects of acquisition\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 572, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 82, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"chunk_id\": \"ab62fbab-c5f8-5b3d-ab2e-2484c77c81fb\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6647442183050991, \"text\": \"79\\n\\n2019 3,615,960\\n\\n2,176,469 636,116 1,505,640 814,122 1,186,093 6,318,440 (2,702,480) \\u2014 102,595 (2,599,885) 2,356 (2,602,241)\\n\\n(11.44)\\n\\n227,498\\n\\n81,321 75,212 971,941 72,046 398,791\\n\\nLyft, Inc. Consolidated Statements of Comprehensive Loss (in thousands)\\n\\nNet loss Other comprehensive income (loss)\\n\\n$\\n\\nYear Ended December 31, 2020 (1,752,857) $\\n\\n2021 (1,009,359) $\\n\\nForeign currency translation adjustment Unrealized gain (loss) on marketable securities, net of taxes\\n\\nOther comprehensive income (loss)\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 567, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 79, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"chunk_id\": \"66efee73-7df1-5786-b56b-3b0a6f9bf390\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6629626355789533, \"text\": \"Overview\\n\\nLyft, Inc (the \\u201cCompany\\u201d or \\u201cLyft\\u201d) started a movement to revolutionize transportation. In 2012, we launched our peer-to-peer marketplace for on-demand ridesharing and have continued to pioneer innovations aligned with our mission. Today, Lyft is one of the largest multimodal transportation networks in the United States and Canada.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 16, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"chunk_id\": \"f043e24f-5d5d-531e-973a-277e65f3b10e\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6602276170118095, \"text\": \"Revenues from Contracts with Customers (ASC 606)\\n\\nWe generate substantially all our revenue from our ridesharing marketplace that connects drivers and riders. We recognize revenue from fees paid by drivers for use of our Lyft Platform offerings in accordance with ASC 606 as described in Note 2 of the notes to our consolidated financial statements. Drivers enter into terms of service (\\u201cToS\\u201d) with us in order to use our Lyft Driver App.\\n\\n58\\n\\n2019 to 2020 % Change\\n\\n19.0% (1.8)% (6.7)% 2.3%\\n\\nWe provide a service to drivers to complete a successful transportation service for riders. This service includes on-demand lead generation that assists drivers to find, receive and fulfill on-demand requests from riders seeking transportation services and related collection activities using our Lyft Platform. As a result, our single performance obligation in the transaction is to connect drivers with riders to facilitate the completion of a successful transportation service for riders.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 459, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 58, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"chunk_id\": \"095cb246-80ec-5c35-96b4-ba902851e0e7\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6476712260834182, \"text\": \"Corporate Information\\n\\nWe were incorporated in 2007 as Bounder Web, Inc., a Delaware corporation. In 2008, we changed our name to Zimride, Inc. We founded Lyft in 2012 and\\n\\nchanged our name to Lyft, Inc. in 2013 when we sold the assets related to our Zimride operations.\\n\\n13\\n\\nAvailable Information\\n\\nOur website is located at www.lyft.com, and our investor relations website is located at investor.lyft.com. Copies of our Annual Report on Form 10-K, Quarterly Reports on Form 10-Q, Current Reports on Form 8-K and amendments to these reports filed or furnished pursuant to Section 13(a) or 15(d) of the Exchange Act, as amended, are available free of charge on our investor relations website as soon as reasonably practicable after we file such material electronically with or furnish it to the Securities and Exchange Commission (the \\u201cSEC\\u201d). The SEC also maintains a website that contains our SEC filings at www.sec.gov.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 82, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"chunk_id\": \"0da7d65c-a0e7-541f-a404-71f32346d988\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6405097674715796, \"text\": \"We generate substantially all of our revenue from our ridesharing marketplace that connects drivers and riders. We collect service fees and commissions from drivers for their use of our ridesharing marketplace. As drivers accept more rider leads and complete more rides, we earn more revenue. We also generate revenue from riders renting Light Vehicles, drivers renting vehicles through Express Drive, Lyft Rentals renters, Lyft Driver Center and Lyft Auto Care users, and by making our ridesharing marketplace available to organizations through our Lyft Business offerings, such as our Concierge and Corporate Business Travel programs. In the second quarter of 2021, we began generating revenues from licensing and data access agreements, primarily with third-party autonomous vehicle companies.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 20, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"chunk_id\": \"f7cdb289-ca94-5e40-909d-7a01a8a5d378\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6394687509853404, \"text\": \"Revenue Recognition\\n\\nThe Company generates its revenue from its multimodal transportation networks that offer access to a variety of transportation options through the Lyft Platform and mobile-based applications. Substantially all of the Company\\u2019s revenue is generated from its ridesharing marketplace that connects drivers and riders and is recognized in accordance with Accounting Standards Codification Topic 606 (\\u201cASC 606\\u201d). In addition, the Company generates revenue in accordance with ASC 606 from licensing and data access, primarily with third-party autonomous vehicle companies. The Company also generates rental revenue from Flexdrive, its network of Light Vehicles and Lyft Rentals, which is recognized in accordance with Accounting Standards Codification Topic 842 (\\u201cASC 842\\u201d).\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 86, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"chunk_id\": \"14c06942-0f82-5a5f-9936-03919f6dac96\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6308834176729083, \"text\": \"Light Vehicle Rider and Lyft Rentals Renter Incentives\\n\\nIncentives offered to Light Vehicle riders and Lyft Rentals renters were not material for the years ended December 31, 2021 and 2020.\\n\\nFor the years ended December 31, 2021, 2020 and 2019, in relation to the driver, rider, Light Vehicle riders and Lyft Rentals renters incentive programs, the Company recorded $1.3 billion, $390.8 million and $560.3 million as a reduction to revenue and $64.7 million, $135.0 million and $381.5 million as sales and marketing expense, respectively.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 611, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 89, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"chunk_id\": \"e0f5bd80-c3d2-58d6-a310-e04fa1618a5a\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6294196468937618, \"text\": \"Software Development Costs\\n\\nThe Company incurs costs related to developing the Lyft Platform and related support systems. The Company capitalizes development costs related to the Lyft Platform and related support systems once the preliminary project stage is complete and it is probable that the project will be completed and the software will be used to perform the function intended. The Company capitalized $16.2 million and $12.8 million of software development costs during the year ended December 31, 2021 and 2020, respectively. For the year ended December 31, 2019, capitalized software development costs was not material.\\n\\nInsurance Reserves\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 649, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 94, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"chunk_id\": \"ab302f43-5dcf-5e04-82ee-754565cd1cda\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [\"122fdf6a-e116-546b-a8f6-e4cb2e2c0a09\"], \"score\": 0.6209709459253888, \"text\": \"32.1\\u2020\\n\\nCertifications of Principal Executive Officer and Principal Financial Officer pursuant to 18 U.S.C. Section 1350, as adopted pursuant to Section 906 of the Sarbanes-Oxley Act of 2002.\\n\\n101\\n\\nThe following financial information from Lyft, Inc.\\u2019s Annual Report on Form 10-K for the fiscal year ended December 31, 2021 formatted in Inline XBRL (eXtensible Business Reporting Language): (i) Consolidated Statements of Operations for the fiscal years ended December 31, 2021, 2020 and 2019; (ii) Consolidated Statements of Comprehensive Income (Loss) for the fiscal years ended December 31, 2021, 2020, and 2019; (iii) Consolidated Balance Sheets as of December 31, 2021 and 2020; (iv) Consolidated Statements of Cash Flows for the fiscal years ended December 31, 2021, 2020, and 2019; (v) Consolidated Statements of Redeemable Convertible Preferred Stock and Stockholders\\u2019 Equity for the fiscal years ended December 31, 2021, 2020, and 2019; and (vi) Notes to the Consolidated Financial Statements.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 817, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 127, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}]Lyft's profit in 2020 was not a profit but a net loss. According to the provided context, Lyft reported a net loss of $1,752,857,000 for the year ended December 31, 2020 [2]." } } ] diff --git a/py/tests/regression/runner.py b/py/tests/regression/runner.py index 68526d4a0..2e9d577ac 100644 --- a/py/tests/regression/runner.py +++ b/py/tests/regression/runner.py @@ -1,7 +1,6 @@ import argparse import importlib import os -from typing import List from colorama import Fore, Style, init from test_cases.base import BaseTest, RegressionTest @@ -18,7 +17,7 @@ def __init__( base_url: str = "http://localhost:7272", ): self.client = R2RClient(base_url=base_url) - self.tests: List[BaseTest] = [] + self.tests: list[BaseTest] = [] self.test_order = [ "TestDocumentManagement", "TestRetrieval", diff --git a/py/tests/regression/test_cases/base.py b/py/tests/regression/test_cases/base.py index 91555644b..afcf57647 100644 --- a/py/tests/regression/test_cases/base.py +++ b/py/tests/regression/test_cases/base.py @@ -1,9 +1,9 @@ import json import os import re -from typing import Any, Callable, Dict, Optional +from typing import Any, Callable, Optional -from colorama import Fore, Style, init +from colorama import Fore, Style from deepdiff import DeepDiff # TODO: need to import this from the package, not from the local directory @@ -20,7 +20,7 @@ def __init__( self, name: str, test_function: Callable[[R2RClient], Any], - expected_output: Dict[str, Any], + expected_output: dict[str, Any], exclude_paths: list[str] = [], ): self.name = name @@ -36,19 +36,19 @@ def update_expected_output(self, client: R2RClient): result = self._run_test(client) self._save_expected_output(result) - def _run_test(self, client: R2RClient) -> Dict[str, Any]: + def _run_test(self, client: R2RClient) -> dict[str, Any]: return self.test_function(client) - def _load_expected_output(self) -> Dict[str, Any]: + def _load_expected_output(self) -> dict[str, Any]: with open(self.expected_output_file, "r") as f: return json.load(f) - def _save_expected_output(self, output: Dict[str, Any]): + def _save_expected_output(self, output: dict[str, Any]): with open(self.expected_output_file, "w") as f: json.dump(output, f, indent=2) def _compare_output( - self, actual: Dict[str, Any], expected: Dict[str, Any] + self, actual: dict[str, Any], expected: dict[str, Any] ) -> bool: diff = self._custom_diff(expected, actual) if diff: @@ -58,8 +58,8 @@ def _compare_output( return True def _custom_diff( - self, expected: Dict[str, Any], actual: Dict[str, Any] - ) -> Dict[str, Any]: + self, expected: dict[str, Any], actual: dict[str, Any] + ) -> dict[str, Any]: diff = {} expected_results = expected.get("results", {}) @@ -157,7 +157,7 @@ def _serialize_deep_diff(self, deep_diff): else: return str(deep_diff) - def _get_completion_content(self, data: Dict[str, Any]) -> Optional[str]: + def _get_completion_content(self, data: dict[str, Any]) -> Optional[str]: try: return data["completion"]["choices"][0]["message"]["content"] except (KeyError, IndexError): @@ -254,7 +254,7 @@ def update_expected_outputs(self, actual_outputs_dir: str): with open(self.expected_outputs_file, "w") as f: json.dump(actual_outputs, f, indent=2) - def _load_expected_outputs(self) -> Dict[str, Any]: + def _load_expected_outputs(self) -> dict[str, Any]: if os.path.exists(self.expected_outputs_file): with open(self.expected_outputs_file, "r") as f: return json.load(f) @@ -263,11 +263,11 @@ def _load_expected_outputs(self) -> Dict[str, Any]: def set_exclude_paths(self, test_name: str, exclude_paths: list[str] = []): self.exclude_paths_map[_to_snake_case(test_name)] = exclude_paths - def get_test_cases(self) -> Dict[str, callable]: + def get_test_cases(self) -> dict[str, callable]: raise NotImplementedError( "Subclasses must implement get_test_cases method" ) - def _load_expected_outputs(self) -> Dict[str, Any]: + def _load_expected_outputs(self) -> dict[str, Any]: with open(self.expected_outputs_file, "r") as f: return json.load(f) diff --git a/py/tests/regression/test_cases/test_document_management.py b/py/tests/regression/test_cases/test_document_management.py index a4a44e048..0c7bee8c3 100644 --- a/py/tests/regression/test_cases/test_document_management.py +++ b/py/tests/regression/test_cases/test_document_management.py @@ -83,7 +83,7 @@ def documents_overview_test(self, client): def document_chunks_test(self, client): try: # Now delete the file - chunks_response = client.document_chunks( + chunks_response = client.list_document_chunks( TestDocumentManagement.CHUNKS_FILE_ID ) return chunks_response diff --git a/services/unstructured/main.py b/services/unstructured/main.py index e1040dbd5..02f75583b 100644 --- a/services/unstructured/main.py +++ b/services/unstructured/main.py @@ -4,7 +4,7 @@ import logging import os from io import BytesIO -from typing import Dict, List, Optional +from typing import Optional from fastapi import FastAPI, HTTPException from pydantic import BaseModel @@ -17,12 +17,12 @@ class PartitionRequestModel(BaseModel): file_content: bytes - ingestion_config: Dict + ingestion_config: dict filename: Optional[str] = None class PartitionResponseModel(BaseModel): - elements: List[Dict] + elements: list[dict] executor = concurrent.futures.ThreadPoolExecutor( @@ -30,7 +30,7 @@ class PartitionResponseModel(BaseModel): ) -def run_partition(file_content: str, filename: str, ingestion_config: Dict) -> List[Dict]: +def run_partition(file_content: str, filename: str, ingestion_config: dict) -> list[dict]: file_content_bytes = base64.b64decode(file_content) file_io = BytesIO(file_content_bytes) elements = partition(file=file_io, file_filename=filename, **ingestion_config) diff --git a/templates/README.md b/templates/README.md deleted file mode 100644 index 1d89fef3b..000000000 --- a/templates/README.md +++ /dev/null @@ -1,9 +0,0 @@ -## R2R Templates - -### R2R templates are in beta! We value your feedback and contributions to make them more widely accessible. - -A collection of templates curated by the SciPhi team and our community to make it easy for you to get started with your RAG application. - -[Search for templates and preview live deployments.](https://app.sciphi.ai/templates) - -Interested in submitting a template? Make a PR to introduce yours! diff --git a/templates/agentic_rag_chatbot/README.md b/templates/agentic_rag_chatbot/README.md deleted file mode 100644 index bf4424997..000000000 --- a/templates/agentic_rag_chatbot/README.md +++ /dev/null @@ -1,99 +0,0 @@ -## Agentic RAG Chatbot - -### R2R templates are in beta! We value your feedback and contributions to make them more widely accessible. - -**Framework:** Python, Next.js - -A boilerplate chatbot that uses the R2R Python SDK to connect to an R2R server. This template offers a simple and clean interfact for users to interact with the chatbot. - -### [Preview a live demo of this template:](https://agentic-chatbot.vercel.app/) - - Agentic RAG Chatbot Image - - -### Deploying -Using the R2R CLI we can clone the template. First, we install the R2R CLI, followed by the clone command for this template: -```bash -pip install r2r - -r2r clone agentic_rag_chatbot -``` - -### Starting your R2R server -We'll need to connect our templates to a running R2R server. You can deploy an R2R pipeline instantly with [SciPhi Cloud](https://app.sciphi.ai/) or you can [learn how to deploy a pipeline yourself here.](https://r2r-docs.sciphi.ai/documentation/installation) - -Once our R2R server is up and running, we can navigate to the template directory, where we'll see a python back end and a Next.js front end. - -```bash -cd agentic_rag_chatbot - -cd python-backend -``` - -If we inspect the `r2r_ingestion.py` file it will shows us that we'll use the R2R Python SDK to ingest a number of sample files about RAG. These file path can be changed. - -```python -import os -import time - -from r2r import R2RClient - -# Our R2R base URL is the URL of our SciPhi deployed R2R server -deployment_url = os.getenv("R2R_DEPLOYMENT_URL") -client = R2RClient(deployment_url) -======= -### by SciPhi - -[!IMPORTANT] -R2R templates are in beta! We value your feedback and contributions to make them more widely accessible. - -| Framework | Python, Next.js | -|-----------|-----------------| -| Use Case | AI, RAG | - - -A boilerplate chatbot that uses the R2R Python SDK to connect to an R2R server. This template offers a simple and clean interfact for users to interact with the chatbot. - -### Deploying - -First, we can create a Python backend to ingest our data: - -```python -from r2r import R2RClient -import time - -# Our R2R base URL is the URL of our SciPhi deployed R2R server -client = R2RClient("YOUR_SCIPHI_DEPLOYMENT_URL") - - -# We'll make sure that we can connect to the server -health_response = client.health() -print(health_response) - -# We'll ingest the data from the data folder -file_paths = ["../web-app/public/data"] -t0 = time.time() -ingest_response = client.ingest_files( - file_paths=file_paths, -) -t1 = time.time() -print(ingest_response) -print(f"Time taken to ingest: {t1 - t0} seconds") -``` - -Additionally, we see that we need to specify the URL at which our R2R server is deployed at. -We can set our R2R deployment URL, and run this file to ingest our sample data: -```bash -export R2R_DEPLOYMENT_URL= - -python r2r_ingestion.py -``` - -Then, we can launch our front end. This can be done locally, or through a serverless provider such as Vercel. - -```bash -cd ../web-app -export NEXT_PUBLIC_DEFAULT_AGENT_URL=$R2R_DEPLOYMENT_URL -npm run build -npm run start -``` diff --git a/templates/agentic_rag_chatbot/agentic_rag_chatbot.png b/templates/agentic_rag_chatbot/agentic_rag_chatbot.png deleted file mode 100644 index da5cf917d..000000000 Binary files a/templates/agentic_rag_chatbot/agentic_rag_chatbot.png and /dev/null differ diff --git a/templates/agentic_rag_chatbot/python-backend/poetry.lock b/templates/agentic_rag_chatbot/python-backend/poetry.lock deleted file mode 100644 index 57cb671f9..000000000 --- a/templates/agentic_rag_chatbot/python-backend/poetry.lock +++ /dev/null @@ -1,664 +0,0 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. - -[[package]] -name = "annotated-types" -version = "0.7.0" -description = "Reusable constraint types to use with typing.Annotated" -optional = false -python-versions = ">=3.8" -files = [ - {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, - {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, -] - -[[package]] -name = "anyio" -version = "4.4.0" -description = "High level compatibility layer for multiple asynchronous event loop implementations" -optional = false -python-versions = ">=3.8" -files = [ - {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"}, - {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"}, -] - -[package.dependencies] -idna = ">=2.8" -sniffio = ">=1.1" - -[package.extras] -doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] -trio = ["trio (>=0.23)"] - -[[package]] -name = "certifi" -version = "2024.7.4" -description = "Python package for providing Mozilla's CA Bundle." -optional = false -python-versions = ">=3.6" -files = [ - {file = "certifi-2024.7.4-py3-none-any.whl", hash = "sha256:c198e21b1289c2ab85ee4e67bb4b4ef3ead0892059901a8d5b622f24a1101e90"}, - {file = "certifi-2024.7.4.tar.gz", hash = "sha256:5a1e7645bc0ec61a09e26c36f6106dd4cf40c6db3a1fb6352b0244e7fb057c7b"}, -] - -[[package]] -name = "charset-normalizer" -version = "3.3.2" -description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -optional = false -python-versions = ">=3.7.0" -files = [ - {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, - {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, - {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, - {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, - {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, - {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, - {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, - {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, -] - -[[package]] -name = "click" -version = "8.1.7" -description = "Composable command line interface toolkit" -optional = false -python-versions = ">=3.7" -files = [ - {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"}, - {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[[package]] -name = "colorama" -version = "0.4.6" -description = "Cross-platform colored terminal text." -optional = false -python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" -files = [ - {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, - {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, -] - -[[package]] -name = "fastapi" -version = "0.109.2" -description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" -optional = false -python-versions = ">=3.8" -files = [ - {file = "fastapi-0.109.2-py3-none-any.whl", hash = "sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d"}, - {file = "fastapi-0.109.2.tar.gz", hash = "sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73"}, -] - -[package.dependencies] -pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" -starlette = ">=0.36.3,<0.37.0" -typing-extensions = ">=4.8.0" - -[package.extras] -all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] - -[[package]] -name = "h11" -version = "0.14.0" -description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -optional = false -python-versions = ">=3.7" -files = [ - {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, - {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, -] - -[[package]] -name = "httpcore" -version = "1.0.5" -description = "A minimal low-level HTTP client." -optional = false -python-versions = ">=3.8" -files = [ - {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, - {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, -] - -[package.dependencies] -certifi = "*" -h11 = ">=0.13,<0.15" - -[package.extras] -asyncio = ["anyio (>=4.0,<5.0)"] -http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] -trio = ["trio (>=0.22.0,<0.26.0)"] - -[[package]] -name = "httpx" -version = "0.27.2" -description = "The next generation HTTP client." -optional = false -python-versions = ">=3.8" -files = [ - {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"}, - {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"}, -] - -[package.dependencies] -anyio = "*" -certifi = "*" -httpcore = "==1.*" -idna = "*" -sniffio = "*" - -[package.extras] -brotli = ["brotli", "brotlicffi"] -cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] -http2 = ["h2 (>=3,<5)"] -socks = ["socksio (==1.*)"] -zstd = ["zstandard (>=0.18.0)"] - -[[package]] -name = "idna" -version = "3.8" -description = "Internationalized Domain Names in Applications (IDNA)" -optional = false -python-versions = ">=3.6" -files = [ - {file = "idna-3.8-py3-none-any.whl", hash = "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac"}, - {file = "idna-3.8.tar.gz", hash = "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603"}, -] - -[[package]] -name = "joblib" -version = "1.4.2" -description = "Lightweight pipelining with Python functions" -optional = false -python-versions = ">=3.8" -files = [ - {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, - {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, -] - -[[package]] -name = "nest-asyncio" -version = "1.6.0" -description = "Patch asyncio to allow nested event loops" -optional = false -python-versions = ">=3.5" -files = [ - {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"}, - {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, -] - -[[package]] -name = "nltk" -version = "3.9.1" -description = "Natural Language Toolkit" -optional = false -python-versions = ">=3.8" -files = [ - {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"}, - {file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"}, -] - -[package.dependencies] -click = "*" -joblib = "*" -regex = ">=2021.8.3" -tqdm = "*" - -[package.extras] -all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"] -corenlp = ["requests"] -machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] -plot = ["matplotlib"] -tgrep = ["pyparsing"] -twitter = ["twython"] - -[[package]] -name = "pydantic" -version = "2.8.2" -description = "Data validation using Python type hints" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pydantic-2.8.2-py3-none-any.whl", hash = "sha256:73ee9fddd406dc318b885c7a2eab8a6472b68b8fb5ba8150949fc3db939f23c8"}, - {file = "pydantic-2.8.2.tar.gz", hash = "sha256:6f62c13d067b0755ad1c21a34bdd06c0c12625a22b0fc09c6b149816604f7c2a"}, -] - -[package.dependencies] -annotated-types = ">=0.4.0" -pydantic-core = "2.20.1" -typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} - -[package.extras] -email = ["email-validator (>=2.0.0)"] - -[[package]] -name = "pydantic-core" -version = "2.20.1" -description = "Core functionality for Pydantic validation and serialization" -optional = false -python-versions = ">=3.8" -files = [ - {file = "pydantic_core-2.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3acae97ffd19bf091c72df4d726d552c473f3576409b2a7ca36b2f535ffff4a3"}, - {file = "pydantic_core-2.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:41f4c96227a67a013e7de5ff8f20fb496ce573893b7f4f2707d065907bffdbd6"}, - {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f239eb799a2081495ea659d8d4a43a8f42cd1fe9ff2e7e436295c38a10c286a"}, - {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:53e431da3fc53360db73eedf6f7124d1076e1b4ee4276b36fb25514544ceb4a3"}, - {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1f62b2413c3a0e846c3b838b2ecd6c7a19ec6793b2a522745b0869e37ab5bc1"}, - {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5d41e6daee2813ecceea8eda38062d69e280b39df793f5a942fa515b8ed67953"}, - {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d482efec8b7dc6bfaedc0f166b2ce349df0011f5d2f1f25537ced4cfc34fd98"}, - {file = "pydantic_core-2.20.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e93e1a4b4b33daed65d781a57a522ff153dcf748dee70b40c7258c5861e1768a"}, - {file = "pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e7c4ea22b6739b162c9ecaaa41d718dfad48a244909fe7ef4b54c0b530effc5a"}, - {file = "pydantic_core-2.20.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4f2790949cf385d985a31984907fecb3896999329103df4e4983a4a41e13e840"}, - {file = "pydantic_core-2.20.1-cp310-none-win32.whl", hash = "sha256:5e999ba8dd90e93d57410c5e67ebb67ffcaadcea0ad973240fdfd3a135506250"}, - {file = "pydantic_core-2.20.1-cp310-none-win_amd64.whl", hash = "sha256:512ecfbefef6dac7bc5eaaf46177b2de58cdf7acac8793fe033b24ece0b9566c"}, - {file = "pydantic_core-2.20.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:d2a8fa9d6d6f891f3deec72f5cc668e6f66b188ab14bb1ab52422fe8e644f312"}, - {file = "pydantic_core-2.20.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:175873691124f3d0da55aeea1d90660a6ea7a3cfea137c38afa0a5ffabe37b88"}, - {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37eee5b638f0e0dcd18d21f59b679686bbd18917b87db0193ae36f9c23c355fc"}, - {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:25e9185e2d06c16ee438ed39bf62935ec436474a6ac4f9358524220f1b236e43"}, - {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:150906b40ff188a3260cbee25380e7494ee85048584998c1e66df0c7a11c17a6"}, - {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8ad4aeb3e9a97286573c03df758fc7627aecdd02f1da04516a86dc159bf70121"}, - {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d3f3ed29cd9f978c604708511a1f9c2fdcb6c38b9aae36a51905b8811ee5cbf1"}, - {file = "pydantic_core-2.20.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b0dae11d8f5ded51699c74d9548dcc5938e0804cc8298ec0aa0da95c21fff57b"}, - {file = "pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:faa6b09ee09433b87992fb5a2859efd1c264ddc37280d2dd5db502126d0e7f27"}, - {file = "pydantic_core-2.20.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:9dc1b507c12eb0481d071f3c1808f0529ad41dc415d0ca11f7ebfc666e66a18b"}, - {file = "pydantic_core-2.20.1-cp311-none-win32.whl", hash = "sha256:fa2fddcb7107e0d1808086ca306dcade7df60a13a6c347a7acf1ec139aa6789a"}, - {file = "pydantic_core-2.20.1-cp311-none-win_amd64.whl", hash = "sha256:40a783fb7ee353c50bd3853e626f15677ea527ae556429453685ae32280c19c2"}, - {file = "pydantic_core-2.20.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:595ba5be69b35777474fa07f80fc260ea71255656191adb22a8c53aba4479231"}, - {file = "pydantic_core-2.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a4f55095ad087474999ee28d3398bae183a66be4823f753cd7d67dd0153427c9"}, - {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9aa05d09ecf4c75157197f27cdc9cfaeb7c5f15021c6373932bf3e124af029f"}, - {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e97fdf088d4b31ff4ba35db26d9cc472ac7ef4a2ff2badeabf8d727b3377fc52"}, - {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bc633a9fe1eb87e250b5c57d389cf28998e4292336926b0b6cdaee353f89a237"}, - {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d573faf8eb7e6b1cbbcb4f5b247c60ca8be39fe2c674495df0eb4318303137fe"}, - {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26dc97754b57d2fd00ac2b24dfa341abffc380b823211994c4efac7f13b9e90e"}, - {file = "pydantic_core-2.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:33499e85e739a4b60c9dac710c20a08dc73cb3240c9a0e22325e671b27b70d24"}, - {file = "pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:bebb4d6715c814597f85297c332297c6ce81e29436125ca59d1159b07f423eb1"}, - {file = "pydantic_core-2.20.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:516d9227919612425c8ef1c9b869bbbee249bc91912c8aaffb66116c0b447ebd"}, - {file = "pydantic_core-2.20.1-cp312-none-win32.whl", hash = "sha256:469f29f9093c9d834432034d33f5fe45699e664f12a13bf38c04967ce233d688"}, - {file = "pydantic_core-2.20.1-cp312-none-win_amd64.whl", hash = "sha256:035ede2e16da7281041f0e626459bcae33ed998cca6a0a007a5ebb73414ac72d"}, - {file = "pydantic_core-2.20.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:0827505a5c87e8aa285dc31e9ec7f4a17c81a813d45f70b1d9164e03a813a686"}, - {file = "pydantic_core-2.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:19c0fa39fa154e7e0b7f82f88ef85faa2a4c23cc65aae2f5aea625e3c13c735a"}, - {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4aa223cd1e36b642092c326d694d8bf59b71ddddc94cdb752bbbb1c5c91d833b"}, - {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c336a6d235522a62fef872c6295a42ecb0c4e1d0f1a3e500fe949415761b8a19"}, - {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7eb6a0587eded33aeefea9f916899d42b1799b7b14b8f8ff2753c0ac1741edac"}, - {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:70c8daf4faca8da5a6d655f9af86faf6ec2e1768f4b8b9d0226c02f3d6209703"}, - {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9fa4c9bf273ca41f940bceb86922a7667cd5bf90e95dbb157cbb8441008482c"}, - {file = "pydantic_core-2.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:11b71d67b4725e7e2a9f6e9c0ac1239bbc0c48cce3dc59f98635efc57d6dac83"}, - {file = "pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:270755f15174fb983890c49881e93f8f1b80f0b5e3a3cc1394a255706cabd203"}, - {file = "pydantic_core-2.20.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c81131869240e3e568916ef4c307f8b99583efaa60a8112ef27a366eefba8ef0"}, - {file = "pydantic_core-2.20.1-cp313-none-win32.whl", hash = "sha256:b91ced227c41aa29c672814f50dbb05ec93536abf8f43cd14ec9521ea09afe4e"}, - {file = "pydantic_core-2.20.1-cp313-none-win_amd64.whl", hash = "sha256:65db0f2eefcaad1a3950f498aabb4875c8890438bc80b19362cf633b87a8ab20"}, - {file = "pydantic_core-2.20.1-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:4745f4ac52cc6686390c40eaa01d48b18997cb130833154801a442323cc78f91"}, - {file = "pydantic_core-2.20.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a8ad4c766d3f33ba8fd692f9aa297c9058970530a32c728a2c4bfd2616d3358b"}, - {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:41e81317dd6a0127cabce83c0c9c3fbecceae981c8391e6f1dec88a77c8a569a"}, - {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04024d270cf63f586ad41fff13fde4311c4fc13ea74676962c876d9577bcc78f"}, - {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eaad4ff2de1c3823fddf82f41121bdf453d922e9a238642b1dedb33c4e4f98ad"}, - {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:26ab812fa0c845df815e506be30337e2df27e88399b985d0bb4e3ecfe72df31c"}, - {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3c5ebac750d9d5f2706654c638c041635c385596caf68f81342011ddfa1e5598"}, - {file = "pydantic_core-2.20.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2aafc5a503855ea5885559eae883978c9b6d8c8993d67766ee73d82e841300dd"}, - {file = "pydantic_core-2.20.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:4868f6bd7c9d98904b748a2653031fc9c2f85b6237009d475b1008bfaeb0a5aa"}, - {file = "pydantic_core-2.20.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:aa2f457b4af386254372dfa78a2eda2563680d982422641a85f271c859df1987"}, - {file = "pydantic_core-2.20.1-cp38-none-win32.whl", hash = "sha256:225b67a1f6d602de0ce7f6c1c3ae89a4aa25d3de9be857999e9124f15dab486a"}, - {file = "pydantic_core-2.20.1-cp38-none-win_amd64.whl", hash = "sha256:6b507132dcfc0dea440cce23ee2182c0ce7aba7054576efc65634f080dbe9434"}, - {file = "pydantic_core-2.20.1-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:b03f7941783b4c4a26051846dea594628b38f6940a2fdc0df00b221aed39314c"}, - {file = "pydantic_core-2.20.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1eedfeb6089ed3fad42e81a67755846ad4dcc14d73698c120a82e4ccf0f1f9f6"}, - {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:635fee4e041ab9c479e31edda27fcf966ea9614fff1317e280d99eb3e5ab6fe2"}, - {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:77bf3ac639c1ff567ae3b47f8d4cc3dc20f9966a2a6dd2311dcc055d3d04fb8a"}, - {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ed1b0132f24beeec5a78b67d9388656d03e6a7c837394f99257e2d55b461611"}, - {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c6514f963b023aeee506678a1cf821fe31159b925c4b76fe2afa94cc70b3222b"}, - {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:10d4204d8ca33146e761c79f83cc861df20e7ae9f6487ca290a97702daf56006"}, - {file = "pydantic_core-2.20.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2d036c7187b9422ae5b262badb87a20a49eb6c5238b2004e96d4da1231badef1"}, - {file = "pydantic_core-2.20.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:9ebfef07dbe1d93efb94b4700f2d278494e9162565a54f124c404a5656d7ff09"}, - {file = "pydantic_core-2.20.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:6b9d9bb600328a1ce523ab4f454859e9d439150abb0906c5a1983c146580ebab"}, - {file = "pydantic_core-2.20.1-cp39-none-win32.whl", hash = "sha256:784c1214cb6dd1e3b15dd8b91b9a53852aed16671cc3fbe4786f4f1db07089e2"}, - {file = "pydantic_core-2.20.1-cp39-none-win_amd64.whl", hash = "sha256:d2fe69c5434391727efa54b47a1e7986bb0186e72a41b203df8f5b0a19a4f669"}, - {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a45f84b09ac9c3d35dfcf6a27fd0634d30d183205230a0ebe8373a0e8cfa0906"}, - {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:d02a72df14dfdbaf228424573a07af10637bd490f0901cee872c4f434a735b94"}, - {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d2b27e6af28f07e2f195552b37d7d66b150adbaa39a6d327766ffd695799780f"}, - {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:084659fac3c83fd674596612aeff6041a18402f1e1bc19ca39e417d554468482"}, - {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:242b8feb3c493ab78be289c034a1f659e8826e2233786e36f2893a950a719bb6"}, - {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:38cf1c40a921d05c5edc61a785c0ddb4bed67827069f535d794ce6bcded919fc"}, - {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e0bbdd76ce9aa5d4209d65f2b27fc6e5ef1312ae6c5333c26db3f5ade53a1e99"}, - {file = "pydantic_core-2.20.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:254ec27fdb5b1ee60684f91683be95e5133c994cc54e86a0b0963afa25c8f8a6"}, - {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:407653af5617f0757261ae249d3fba09504d7a71ab36ac057c938572d1bc9331"}, - {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:c693e916709c2465b02ca0ad7b387c4f8423d1db7b4649c551f27a529181c5ad"}, - {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5b5ff4911aea936a47d9376fd3ab17e970cc543d1b68921886e7f64bd28308d1"}, - {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:177f55a886d74f1808763976ac4efd29b7ed15c69f4d838bbd74d9d09cf6fa86"}, - {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:964faa8a861d2664f0c7ab0c181af0bea66098b1919439815ca8803ef136fc4e"}, - {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:4dd484681c15e6b9a977c785a345d3e378d72678fd5f1f3c0509608da24f2ac0"}, - {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f6d6cff3538391e8486a431569b77921adfcdef14eb18fbf19b7c0a5294d4e6a"}, - {file = "pydantic_core-2.20.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a6d511cc297ff0883bc3708b465ff82d7560193169a8b93260f74ecb0a5e08a7"}, - {file = "pydantic_core-2.20.1.tar.gz", hash = "sha256:26ca695eeee5f9f1aeeb211ffc12f10bcb6f71e2989988fda61dabd65db878d4"}, -] - -[package.dependencies] -typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" - -[[package]] -name = "python-dotenv" -version = "1.0.1" -description = "Read key-value pairs from a .env file and set them as environment variables" -optional = false -python-versions = ">=3.8" -files = [ - {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, - {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, -] - -[package.extras] -cli = ["click (>=5.0)"] - -[[package]] -name = "r2r" -version = "3.0.7" -description = "SciPhi R2R" -optional = false -python-versions = "<3.13,>=3.9" -files = [ - {file = "r2r-3.0.7-py3-none-any.whl", hash = "sha256:790cc0854660975fe63926767225d1e577da203dde6479dd88dbb64d365666e5"}, - {file = "r2r-3.0.7.tar.gz", hash = "sha256:ee85f243d608a61ee4fccb88798681bd836fd1d94990581412f6f8ea5f6b3681"}, -] - -[package.dependencies] -click = ">=8.0.0,<9.0.0" -fastapi = ">=0.109.2,<0.110.0" -httpx = ">=0.27.0,<0.28.0" -nest-asyncio = ">=1.6.0,<2.0.0" -nltk = ">=3.9.1,<4.0.0" -python-dotenv = ">=1.0.1,<2.0.0" -requests = ">=2.31.0,<3.0.0" -types-requests = ">=2.31.0,<3.0.0" - -[package.extras] -core = ["aiosqlite (>=0.20.0,<0.21.0)", "asyncpg (>=0.29.0,<0.30.0)", "bcrypt (>=4.1.3,<5.0.0)", "beautifulsoup4 (>=4.12.3,<5.0.0)", "deepdiff (>=7.0.1,<8.0.0)", "fire (>=0.5.0,<0.6.0)", "fsspec (>=2024.6.0,<2025.0.0)", "graspologic (>=3.4.1,<4.0.0)", "gunicorn (>=21.2.0,<22.0.0)", "litellm (>=1.42.3,<2.0.0)", "markdown (>=3.6,<4.0)", "neo4j (>=5.21.0,<6.0.0)", "ollama (>=0.3.1,<0.4.0)", "openai (>=1.11.1,<2.0.0)", "openpyxl (>=3.1.2,<4.0.0)", "passlib (>=1.7.4,<2.0.0)", "poppler-utils (>=0.1.0,<0.2.0)", "posthog (>=3.5.0,<4.0.0)", "psutil (>=6.0.0,<7.0.0)", "pydantic[email] (>=2.8.2,<3.0.0)", "pyjwt (>=2.8.0,<3.0.0)", "pypdf (>=4.2.0,<5.0.0)", "python-docx (>=1.1.0,<2.0.0)", "python-multipart (>=0.0.9,<0.0.10)", "python-pptx (>=1.0.1,<2.0.0)", "pyyaml (>=6.0.1,<7.0.0)", "redis (>=5.0.4,<6.0.0)", "sqlalchemy (>=2.0.30,<3.0.0)", "toml (>=0.10.2,<0.11.0)", "uvicorn (>=0.27.0.post1,<0.28.0)", "vecs (>=0.4.0,<0.5.0)"] -core-ingest-movies = ["moviepy (>=1.0.3,<2.0.0)", "opencv-python (>=4.10.0.82,<5.0.0.0)"] - -[[package]] -name = "regex" -version = "2024.7.24" -description = "Alternative regular expression module, to replace re." -optional = false -python-versions = ">=3.8" -files = [ - {file = "regex-2024.7.24-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:228b0d3f567fafa0633aee87f08b9276c7062da9616931382993c03808bb68ce"}, - {file = "regex-2024.7.24-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3426de3b91d1bc73249042742f45c2148803c111d1175b283270177fdf669024"}, - {file = "regex-2024.7.24-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f273674b445bcb6e4409bf8d1be67bc4b58e8b46fd0d560055d515b8830063cd"}, - {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23acc72f0f4e1a9e6e9843d6328177ae3074b4182167e34119ec7233dfeccf53"}, - {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65fd3d2e228cae024c411c5ccdffae4c315271eee4a8b839291f84f796b34eca"}, - {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c414cbda77dbf13c3bc88b073a1a9f375c7b0cb5e115e15d4b73ec3a2fbc6f59"}, - {file = "regex-2024.7.24-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf7a89eef64b5455835f5ed30254ec19bf41f7541cd94f266ab7cbd463f00c41"}, - {file = "regex-2024.7.24-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:19c65b00d42804e3fbea9708f0937d157e53429a39b7c61253ff15670ff62cb5"}, - {file = "regex-2024.7.24-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7a5486ca56c8869070a966321d5ab416ff0f83f30e0e2da1ab48815c8d165d46"}, - {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6f51f9556785e5a203713f5efd9c085b4a45aecd2a42573e2b5041881b588d1f"}, - {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:a4997716674d36a82eab3e86f8fa77080a5d8d96a389a61ea1d0e3a94a582cf7"}, - {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:c0abb5e4e8ce71a61d9446040c1e86d4e6d23f9097275c5bd49ed978755ff0fe"}, - {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:18300a1d78cf1290fa583cd8b7cde26ecb73e9f5916690cf9d42de569c89b1ce"}, - {file = "regex-2024.7.24-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:416c0e4f56308f34cdb18c3f59849479dde5b19febdcd6e6fa4d04b6c31c9faa"}, - {file = "regex-2024.7.24-cp310-cp310-win32.whl", hash = "sha256:fb168b5924bef397b5ba13aabd8cf5df7d3d93f10218d7b925e360d436863f66"}, - {file = "regex-2024.7.24-cp310-cp310-win_amd64.whl", hash = "sha256:6b9fc7e9cc983e75e2518496ba1afc524227c163e43d706688a6bb9eca41617e"}, - {file = "regex-2024.7.24-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:382281306e3adaaa7b8b9ebbb3ffb43358a7bbf585fa93821300a418bb975281"}, - {file = "regex-2024.7.24-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4fdd1384619f406ad9037fe6b6eaa3de2749e2e12084abc80169e8e075377d3b"}, - {file = "regex-2024.7.24-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3d974d24edb231446f708c455fd08f94c41c1ff4f04bcf06e5f36df5ef50b95a"}, - {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2ec4419a3fe6cf8a4795752596dfe0adb4aea40d3683a132bae9c30b81e8d73"}, - {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb563dd3aea54c797adf513eeec819c4213d7dbfc311874eb4fd28d10f2ff0f2"}, - {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:45104baae8b9f67569f0f1dca5e1f1ed77a54ae1cd8b0b07aba89272710db61e"}, - {file = "regex-2024.7.24-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:994448ee01864501912abf2bad9203bffc34158e80fe8bfb5b031f4f8e16da51"}, - {file = "regex-2024.7.24-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3fac296f99283ac232d8125be932c5cd7644084a30748fda013028c815ba3364"}, - {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:7e37e809b9303ec3a179085415cb5f418ecf65ec98cdfe34f6a078b46ef823ee"}, - {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:01b689e887f612610c869421241e075c02f2e3d1ae93a037cb14f88ab6a8934c"}, - {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:f6442f0f0ff81775eaa5b05af8a0ffa1dda36e9cf6ec1e0d3d245e8564b684ce"}, - {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:871e3ab2838fbcb4e0865a6e01233975df3a15e6fce93b6f99d75cacbd9862d1"}, - {file = "regex-2024.7.24-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c918b7a1e26b4ab40409820ddccc5d49871a82329640f5005f73572d5eaa9b5e"}, - {file = "regex-2024.7.24-cp311-cp311-win32.whl", hash = "sha256:2dfbb8baf8ba2c2b9aa2807f44ed272f0913eeeba002478c4577b8d29cde215c"}, - {file = "regex-2024.7.24-cp311-cp311-win_amd64.whl", hash = "sha256:538d30cd96ed7d1416d3956f94d54e426a8daf7c14527f6e0d6d425fcb4cca52"}, - {file = "regex-2024.7.24-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:fe4ebef608553aff8deb845c7f4f1d0740ff76fa672c011cc0bacb2a00fbde86"}, - {file = "regex-2024.7.24-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:74007a5b25b7a678459f06559504f1eec2f0f17bca218c9d56f6a0a12bfffdad"}, - {file = "regex-2024.7.24-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7df9ea48641da022c2a3c9c641650cd09f0cd15e8908bf931ad538f5ca7919c9"}, - {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6a1141a1dcc32904c47f6846b040275c6e5de0bf73f17d7a409035d55b76f289"}, - {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:80c811cfcb5c331237d9bad3bea2c391114588cf4131707e84d9493064d267f9"}, - {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7214477bf9bd195894cf24005b1e7b496f46833337b5dedb7b2a6e33f66d962c"}, - {file = "regex-2024.7.24-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d55588cba7553f0b6ec33130bc3e114b355570b45785cebdc9daed8c637dd440"}, - {file = "regex-2024.7.24-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:558a57cfc32adcf19d3f791f62b5ff564922942e389e3cfdb538a23d65a6b610"}, - {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a512eed9dfd4117110b1881ba9a59b31433caed0c4101b361f768e7bcbaf93c5"}, - {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:86b17ba823ea76256b1885652e3a141a99a5c4422f4a869189db328321b73799"}, - {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5eefee9bfe23f6df09ffb6dfb23809f4d74a78acef004aa904dc7c88b9944b05"}, - {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:731fcd76bbdbf225e2eb85b7c38da9633ad3073822f5ab32379381e8c3c12e94"}, - {file = "regex-2024.7.24-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:eaef80eac3b4cfbdd6de53c6e108b4c534c21ae055d1dbea2de6b3b8ff3def38"}, - {file = "regex-2024.7.24-cp312-cp312-win32.whl", hash = "sha256:185e029368d6f89f36e526764cf12bf8d6f0e3a2a7737da625a76f594bdfcbfc"}, - {file = "regex-2024.7.24-cp312-cp312-win_amd64.whl", hash = "sha256:2f1baff13cc2521bea83ab2528e7a80cbe0ebb2c6f0bfad15be7da3aed443908"}, - {file = "regex-2024.7.24-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:66b4c0731a5c81921e938dcf1a88e978264e26e6ac4ec96a4d21ae0354581ae0"}, - {file = "regex-2024.7.24-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:88ecc3afd7e776967fa16c80f974cb79399ee8dc6c96423321d6f7d4b881c92b"}, - {file = "regex-2024.7.24-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:64bd50cf16bcc54b274e20235bf8edbb64184a30e1e53873ff8d444e7ac656b2"}, - {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eb462f0e346fcf41a901a126b50f8781e9a474d3927930f3490f38a6e73b6950"}, - {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a82465ebbc9b1c5c50738536fdfa7cab639a261a99b469c9d4c7dcbb2b3f1e57"}, - {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:68a8f8c046c6466ac61a36b65bb2395c74451df2ffb8458492ef49900efed293"}, - {file = "regex-2024.7.24-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dac8e84fff5d27420f3c1e879ce9929108e873667ec87e0c8eeb413a5311adfe"}, - {file = "regex-2024.7.24-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba2537ef2163db9e6ccdbeb6f6424282ae4dea43177402152c67ef869cf3978b"}, - {file = "regex-2024.7.24-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:43affe33137fcd679bdae93fb25924979517e011f9dea99163f80b82eadc7e53"}, - {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:c9bb87fdf2ab2370f21e4d5636e5317775e5d51ff32ebff2cf389f71b9b13750"}, - {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:945352286a541406f99b2655c973852da7911b3f4264e010218bbc1cc73168f2"}, - {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_ppc64le.whl", hash = "sha256:8bc593dcce679206b60a538c302d03c29b18e3d862609317cb560e18b66d10cf"}, - {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_s390x.whl", hash = "sha256:3f3b6ca8eae6d6c75a6cff525c8530c60e909a71a15e1b731723233331de4169"}, - {file = "regex-2024.7.24-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c51edc3541e11fbe83f0c4d9412ef6c79f664a3745fab261457e84465ec9d5a8"}, - {file = "regex-2024.7.24-cp38-cp38-win32.whl", hash = "sha256:d0a07763776188b4db4c9c7fb1b8c494049f84659bb387b71c73bbc07f189e96"}, - {file = "regex-2024.7.24-cp38-cp38-win_amd64.whl", hash = "sha256:8fd5afd101dcf86a270d254364e0e8dddedebe6bd1ab9d5f732f274fa00499a5"}, - {file = "regex-2024.7.24-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0ffe3f9d430cd37d8fa5632ff6fb36d5b24818c5c986893063b4e5bdb84cdf24"}, - {file = "regex-2024.7.24-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:25419b70ba00a16abc90ee5fce061228206173231f004437730b67ac77323f0d"}, - {file = "regex-2024.7.24-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:33e2614a7ce627f0cdf2ad104797d1f68342d967de3695678c0cb84f530709f8"}, - {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d33a0021893ede5969876052796165bab6006559ab845fd7b515a30abdd990dc"}, - {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:04ce29e2c5fedf296b1a1b0acc1724ba93a36fb14031f3abfb7abda2806c1535"}, - {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b16582783f44fbca6fcf46f61347340c787d7530d88b4d590a397a47583f31dd"}, - {file = "regex-2024.7.24-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:836d3cc225b3e8a943d0b02633fb2f28a66e281290302a79df0e1eaa984ff7c1"}, - {file = "regex-2024.7.24-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:438d9f0f4bc64e8dea78274caa5af971ceff0f8771e1a2333620969936ba10be"}, - {file = "regex-2024.7.24-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:973335b1624859cb0e52f96062a28aa18f3a5fc77a96e4a3d6d76e29811a0e6e"}, - {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c5e69fd3eb0b409432b537fe3c6f44ac089c458ab6b78dcec14478422879ec5f"}, - {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:fbf8c2f00904eaf63ff37718eb13acf8e178cb940520e47b2f05027f5bb34ce3"}, - {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:ae2757ace61bc4061b69af19e4689fa4416e1a04840f33b441034202b5cd02d4"}, - {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:44fc61b99035fd9b3b9453f1713234e5a7c92a04f3577252b45feefe1b327759"}, - {file = "regex-2024.7.24-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:84c312cdf839e8b579f504afcd7b65f35d60b6285d892b19adea16355e8343c9"}, - {file = "regex-2024.7.24-cp39-cp39-win32.whl", hash = "sha256:ca5b2028c2f7af4e13fb9fc29b28d0ce767c38c7facdf64f6c2cd040413055f1"}, - {file = "regex-2024.7.24-cp39-cp39-win_amd64.whl", hash = "sha256:7c479f5ae937ec9985ecaf42e2e10631551d909f203e31308c12d703922742f9"}, - {file = "regex-2024.7.24.tar.gz", hash = "sha256:9cfd009eed1a46b27c14039ad5bbc5e71b6367c5b2e6d5f5da0ea91600817506"}, -] - -[[package]] -name = "requests" -version = "2.32.3" -description = "Python HTTP for Humans." -optional = false -python-versions = ">=3.8" -files = [ - {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, - {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, -] - -[package.dependencies] -certifi = ">=2017.4.17" -charset-normalizer = ">=2,<4" -idna = ">=2.5,<4" -urllib3 = ">=1.21.1,<3" - -[package.extras] -socks = ["PySocks (>=1.5.6,!=1.5.7)"] -use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] - -[[package]] -name = "sniffio" -version = "1.3.1" -description = "Sniff out which async library your code is running under" -optional = false -python-versions = ">=3.7" -files = [ - {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, - {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, -] - -[[package]] -name = "starlette" -version = "0.36.3" -description = "The little ASGI library that shines." -optional = false -python-versions = ">=3.8" -files = [ - {file = "starlette-0.36.3-py3-none-any.whl", hash = "sha256:13d429aa93a61dc40bf503e8c801db1f1bca3dc706b10ef2434a36123568f044"}, - {file = "starlette-0.36.3.tar.gz", hash = "sha256:90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080"}, -] - -[package.dependencies] -anyio = ">=3.4.0,<5" - -[package.extras] -full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] - -[[package]] -name = "tqdm" -version = "4.66.5" -description = "Fast, Extensible Progress Meter" -optional = false -python-versions = ">=3.7" -files = [ - {file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"}, - {file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"}, -] - -[package.dependencies] -colorama = {version = "*", markers = "platform_system == \"Windows\""} - -[package.extras] -dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"] -notebook = ["ipywidgets (>=6)"] -slack = ["slack-sdk"] -telegram = ["requests"] - -[[package]] -name = "types-requests" -version = "2.32.0.20240712" -description = "Typing stubs for requests" -optional = false -python-versions = ">=3.8" -files = [ - {file = "types-requests-2.32.0.20240712.tar.gz", hash = "sha256:90c079ff05e549f6bf50e02e910210b98b8ff1ebdd18e19c873cd237737c1358"}, - {file = "types_requests-2.32.0.20240712-py3-none-any.whl", hash = "sha256:f754283e152c752e46e70942fa2a146b5bc70393522257bb85bd1ef7e019dcc3"}, -] - -[package.dependencies] -urllib3 = ">=2" - -[[package]] -name = "typing-extensions" -version = "4.12.2" -description = "Backported and Experimental Type Hints for Python 3.8+" -optional = false -python-versions = ">=3.8" -files = [ - {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"}, - {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"}, -] - -[[package]] -name = "urllib3" -version = "2.2.2" -description = "HTTP library with thread-safe connection pooling, file post, and more." -optional = false -python-versions = ">=3.8" -files = [ - {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, - {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, -] - -[package.extras] -brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] -h2 = ["h2 (>=4,<5)"] -socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] -zstd = ["zstandard (>=0.18.0)"] - -[metadata] -lock-version = "2.0" -python-versions = ">=3.12,<3.13" -content-hash = "6491d910d5dfd9f6210b4ee14faea993d36e9b9cbaf49cfe65f06a87b2004e9e" diff --git a/templates/agentic_rag_chatbot/python-backend/pyproject.toml b/templates/agentic_rag_chatbot/python-backend/pyproject.toml deleted file mode 100644 index 0eaed2c29..000000000 --- a/templates/agentic_rag_chatbot/python-backend/pyproject.toml +++ /dev/null @@ -1,15 +0,0 @@ -[tool.poetry] -name = "python-backend" -version = "0.1.0" -description = "" -authors = ["Your Name "] -readme = "README.md" -package-mode = false - -[tool.poetry.dependencies] -python = ">=3.12,<3.13" -r2r = "^3.0.7" - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" diff --git a/templates/agentic_rag_chatbot/python-backend/r2r_ingestion.py b/templates/agentic_rag_chatbot/python-backend/r2r_ingestion.py deleted file mode 100644 index e0aca53a2..000000000 --- a/templates/agentic_rag_chatbot/python-backend/r2r_ingestion.py +++ /dev/null @@ -1,22 +0,0 @@ -import os -import time - -from r2r import R2RClient - -# Our R2R base URL is the URL of our SciPhi deployed R2R server -deployment_url = os.getenv("R2R_DEPLOYMENT_URL") -client = R2RClient(deployment_url) - -# We'll make sure that we can connect to the server -health_response = client.health() -print(health_response) - -# We'll ingest the data from the data folder -file_paths = ["../web-app/public/data"] -t0 = time.time() -ingest_response = client.ingest_files( - file_paths=file_paths, -) -t1 = time.time() -print(ingest_response) -print(f"Time taken to ingest: {t1 - t0} seconds") diff --git a/templates/agentic_rag_chatbot/web-app/.eslintrc.json b/templates/agentic_rag_chatbot/web-app/.eslintrc.json deleted file mode 100644 index 7d3ff4ebf..000000000 --- a/templates/agentic_rag_chatbot/web-app/.eslintrc.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "ignorePatterns": ["public/data/**"], - "extends": ["next", "next/core-web-vitals"] -} diff --git a/templates/agentic_rag_chatbot/web-app/.gitignore b/templates/agentic_rag_chatbot/web-app/.gitignore deleted file mode 100644 index fd3dbb571..000000000 --- a/templates/agentic_rag_chatbot/web-app/.gitignore +++ /dev/null @@ -1,36 +0,0 @@ -# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. - -# dependencies -/node_modules -/.pnp -.pnp.js -.yarn/install-state.gz - -# testing -/coverage - -# next.js -/.next/ -/out/ - -# production -/build - -# misc -.DS_Store -*.pem - -# debug -npm-debug.log* -yarn-debug.log* -yarn-error.log* - -# local env files -.env*.local - -# vercel -.vercel - -# typescript -*.tsbuildinfo -next-env.d.ts diff --git a/templates/agentic_rag_chatbot/web-app/.prettierignore b/templates/agentic_rag_chatbot/web-app/.prettierignore deleted file mode 100644 index b8ec90b05..000000000 --- a/templates/agentic_rag_chatbot/web-app/.prettierignore +++ /dev/null @@ -1 +0,0 @@ -public/data/** diff --git a/templates/agentic_rag_chatbot/web-app/.prettierrc b/templates/agentic_rag_chatbot/web-app/.prettierrc deleted file mode 100644 index 92f97e756..000000000 --- a/templates/agentic_rag_chatbot/web-app/.prettierrc +++ /dev/null @@ -1,6 +0,0 @@ -{ - "semi": true, - "singleQuote": true, - "tabWidth": 2, - "trailingComma": "es5" -} diff --git a/templates/agentic_rag_chatbot/web-app/README.md b/templates/agentic_rag_chatbot/web-app/README.md deleted file mode 100644 index a75ac5248..000000000 --- a/templates/agentic_rag_chatbot/web-app/README.md +++ /dev/null @@ -1,40 +0,0 @@ -This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app). - -## Getting Started - -First, run the development server: - -```bash -npm run dev -# or -yarn dev -# or -pnpm dev -# or -bun dev -``` - -Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. - -You can start editing the page by modifying `pages/index.tsx`. The page auto-updates as you edit the file. - -[API routes](https://nextjs.org/docs/api-routes/introduction) can be accessed on [http://localhost:3000/api/hello](http://localhost:3000/api/hello). This endpoint can be edited in `pages/api/hello.ts`. - -The `pages/api` directory is mapped to `/api/*`. Files in this directory are treated as [API routes](https://nextjs.org/docs/api-routes/introduction) instead of React pages. - -This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font. - -## Learn More - -To learn more about Next.js, take a look at the following resources: - -- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. -- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. - -You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js/) - your feedback and contributions are welcome! - -## Deploy on Vercel - -The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. - -Check out our [Next.js deployment documentation](https://nextjs.org/docs/deployment) for more details. diff --git a/templates/agentic_rag_chatbot/web-app/components/Answer.tsx b/templates/agentic_rag_chatbot/web-app/components/Answer.tsx deleted file mode 100644 index aab38f3b7..000000000 --- a/templates/agentic_rag_chatbot/web-app/components/Answer.tsx +++ /dev/null @@ -1,267 +0,0 @@ -import React, { useState, useEffect } from 'react'; -import { FC } from 'react'; -import Markdown from 'react-markdown'; - -import { - Popover, - PopoverContent, - PopoverTrigger, -} from '@/components/ui/popover'; -import { Skeleton } from '@/components/ui/skeleton'; -import { Logo } from '@/components/Logo'; -import { - Accordion, - AccordionContent, - AccordionItem, - AccordionTrigger, -} from '@/components/ui/accordion'; - -interface Message { - role: 'system' | 'user' | 'assistant'; - content: string; - id?: string; - timestamp?: number; - isStreaming?: boolean; - sources?: string | null; - searchPerformed?: boolean; -} - -interface Source { - id: string; - score: number; - metadata: { - title?: string; - text?: string; - documentid?: string; - snippet?: string; - }; -} - -const AnimatedEllipsis: FC = () => { - const [dots, setDots] = useState(''); - - useEffect(() => { - const interval = setInterval(() => { - setDots((prevDots) => (prevDots.length >= 3 ? '' : prevDots + '.')); - }, 200); - - return () => clearInterval(interval); - }, []); - - return {dots}; -}; - -const SourceItem: FC<{ source: Source }> = ({ source }) => { - const { score, metadata } = source; - - return ( -
-

- {metadata.title || 'Untitled'} (Similarity: {score.toFixed(3)}) -

-

- {metadata.text || 'No content available'} -

-
- ); -}; - -function formatMarkdownNewLines(markdown: string): string { - return markdown - .replace(/\[(\d+)]/g, '[$1]($1)') - .split(`"queries":`)[0] - .replace(/\\u[\dA-F]{4}/gi, (match: string) => - String.fromCharCode(parseInt(match.replace(/\\u/g, ''), 16)) - ); -} - -const parseSources = (sources: string | object): Source[] => { - if (typeof sources === 'string') { - try { - const individualSources = sources - .split(',"{"') - .map((source, index) => (index === 0 ? source : `{"${source}`)); - const jsonArrayString = `[${individualSources.join(',')}]`; - const partialParsedSources = JSON.parse(jsonArrayString); - return partialParsedSources.map((source: any) => JSON.parse(source)); - } catch (error) { - console.error('Failed to parse sources:', error); - return []; - } - } - return sources as Source[]; -}; - -export const Answer: FC<{ - message: Message; - isStreaming: boolean; - isSearching: boolean; -}> = ({ message, isStreaming, isSearching }) => { - const [isOpen, setIsOpen] = useState(false); - const [parsedSources, setParsedSources] = useState([]); - - useEffect(() => { - if (message.sources) { - try { - const parsed = parseSources(message.sources); - setParsedSources(parsed); - } catch (error) { - console.error('Failed to parse sources:', error); - setParsedSources([]); - } - } else { - setParsedSources([]); - } - }, [message.sources]); - - const renderContent = () => { - const paragraphs = message.content.split('\n\n'); - return paragraphs.map((paragraph, index) => ( -

, - h2: (props) =>

, - h3: (props) =>

, - h4: (props) =>

, - h5: (props) =>

, - h6: (props) =>
, - strong: (props) => ( - - ), - p: ({ children }) => ( -

- {children} - {isStreaming && index === paragraphs.length - 1 && ( - - )} -

- ), - li: (props) =>
  • , - blockquote: (props) => ( -
    - ), - em: (props) => , - code: (props) => , - pre: (props) =>
    ,
    -          a: ({ href, ...props }) => {
    -            if (!href) return null;
    -            const source = parsedSources[+href - 1];
    -            if (!source) return null;
    -            const metadata = source.metadata;
    -            return (
    -              
    -                
    -                  
    -                    
    -                      {href}
    -                    
    -                  
    -                  
    -                    
    - {metadata.title ? `Title: ${metadata.title}` : ''} - {metadata?.documentid - ? `, DocumentId: ${metadata.documentid.slice(0, 8)}` - : ''} -
    -
    -
    -
    - {metadata?.snippet ?? ''} -
    -
    - {metadata?.text ?? ''} -
    -
    -
    -
    -
    -
    - ); - }, - }} - > - {formatMarkdownNewLines(paragraph)} - - )); - }; - - return ( -
    - {parsedSources.length > 0 && ( - setIsOpen(value === 'answer')} - > - - -
    - - - {isSearching ? ( - - Searching over sources... - - ) : ( - `View ${parsedSources.length} Sources` - )} - -
    -
    - -
    -
    - {parsedSources.map((item: Source) => ( - - ))} -
    -
    -
    -
    -
    - )} - - {message.searchPerformed && parsedSources.length === 0 && ( -
    - - No sources found -
    - )} - - {!message.searchPerformed && ( -
    - -
    - )} - -
    - {message.content || isStreaming ? ( -
    - {message.content ? ( - renderContent() - ) : ( -

    - -

    - )} -
    - ) : ( -
    - - - - -
    - )} -
    -
    - ); -}; diff --git a/templates/agentic_rag_chatbot/web-app/components/ChatWindow.tsx b/templates/agentic_rag_chatbot/web-app/components/ChatWindow.tsx deleted file mode 100644 index 63975f781..000000000 --- a/templates/agentic_rag_chatbot/web-app/components/ChatWindow.tsx +++ /dev/null @@ -1,260 +0,0 @@ -import React, { FC, useEffect, useState, useRef, useCallback } from 'react'; -import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert'; -import { Info, X } from 'lucide-react'; - -import MessageBubble from '@/components/MessageBubble'; -import { Answer } from '@/components/Answer'; -import { DefaultQueries } from '@/components/DefaultQueries'; - -const FUNCTION_START_TOKEN = ''; -const FUNCTION_END_TOKEN = ''; -const LLM_START_TOKEN = ''; -const LLM_END_TOKEN = ''; - -interface Message { - role: 'system' | 'user' | 'assistant'; - content: string; - id: string; - timestamp: number; - isStreaming?: boolean; - sources?: string | null; - searchPerformed?: boolean; -} - -export const ChatWindow: FC<{ - query: string; - setQuery: (query: string) => void; - agentUrl: string; - messages: any[]; - setMessages: React.Dispatch>; - isStreaming: boolean; - setIsStreaming: React.Dispatch>; -}> = ({ - query, - setQuery, - agentUrl, - messages, - setMessages, - isStreaming, - setIsStreaming, -}) => { - const [isSearching, setIsSearching] = useState(false); - const [error, setError] = useState(null); - const messagesEndRef = useRef(null); - const [showInfoAlert, setShowInfoAlert] = useState(true); - - const scrollToBottom = useCallback(() => { - messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); - }, []); - - useEffect(() => { - if (messages.length > 0) { - scrollToBottom(); - } - }, [messages, scrollToBottom]); - - const updateLastMessage = useCallback( - ( - content?: string, - sources?: string, - isStreaming?: boolean, - searchPerformed?: boolean - ) => { - setMessages((prevMessages) => { - const updatedMessages = [...prevMessages]; - const lastMessage = updatedMessages[updatedMessages.length - 1]; - if (lastMessage.role === 'assistant') { - return [ - ...updatedMessages.slice(0, -1), - { - ...lastMessage, - ...(content !== undefined && { content }), - ...(sources !== undefined && { sources }), - ...(isStreaming !== undefined && { isStreaming }), - ...(searchPerformed !== undefined && { searchPerformed }), - }, - ]; - } - return updatedMessages; - }); - }, - [setMessages] - ); - - useEffect(() => { - if (!query || isStreaming) { - return; - } - - setShowInfoAlert(false); - - const parseStreaming = async () => { - setIsStreaming(true); - setIsSearching(true); - setError(null); - - const newUserMessage: Message = { - role: 'user', - content: query, - id: Date.now().toString(), - timestamp: Date.now(), - }; - - const newAssistantMessage: Message = { - role: 'assistant', - content: '', - id: (Date.now() + 1).toString(), - timestamp: Date.now() + 1, - isStreaming: true, - sources: null, - searchPerformed: false, - }; - - setMessages((prevMessages) => [ - ...prevMessages, - newUserMessage, - newAssistantMessage, - ]); - - let buffer = ''; - let inLLMResponse = false; - let fullContent = ''; - - try { - const response = await fetch('/api/agent', { - method: 'POST', - headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ - messages: [...messages, newUserMessage], - apiUrl: agentUrl, - use_vector_search: true, - filters: {}, - search_limit: 10, - do_hybrid_search: false, - use_kg_search: false, - rag_generation_config: { - stream: true, - }, - }), - }); - - const reader = response.body!.getReader(); - const decoder = new TextDecoder(); - - while (true) { - const { done, value } = await reader.read(); - if (done) { - break; - } - - buffer += decoder.decode(value, { stream: true }); - - if (buffer.includes(FUNCTION_END_TOKEN)) { - const [results, rest] = buffer.split(FUNCTION_END_TOKEN); - const sourcesContent = results - .replace(FUNCTION_START_TOKEN, '') - .replace(/^[\s\S]*?([\s\S]*)<\/results>[\s\S]*$/, '$1'); - updateLastMessage(undefined, sourcesContent, undefined, true); - buffer = rest || ''; - setIsSearching(false); - } - - if (buffer.includes(LLM_START_TOKEN)) { - inLLMResponse = true; - buffer = buffer.split(LLM_START_TOKEN)[1] || ''; - } - - if (inLLMResponse) { - const endTokenIndex = buffer.indexOf(LLM_END_TOKEN); - let chunk = ''; - - if (endTokenIndex !== -1) { - chunk = buffer.slice(0, endTokenIndex); - buffer = buffer.slice(endTokenIndex + LLM_END_TOKEN.length); - inLLMResponse = false; - } else { - chunk = buffer; - buffer = ''; - } - - fullContent += chunk; - updateLastMessage(fullContent, undefined, true); - } - } - } catch (err: unknown) { - console.error('Error in streaming:', err); - setError(err instanceof Error ? err.message : String(err)); - } finally { - setIsStreaming(false); - setIsSearching(false); - updateLastMessage(fullContent, undefined, false); - setQuery(''); - } - }; - - parseStreaming(); - }, [ - query, - agentUrl, - setMessages, - setIsStreaming, - messages, - updateLastMessage, - isStreaming, - setQuery, - ]); - - return ( -
    - {showInfoAlert && ( - -
    - - - You're testing out an R2R Template — - - Deploy it for yourself in just 5 minutes! - - -
    - - Using RAG in your production applications is easy with SciPhi! -

    - Here, we've connected to a SciPhi hosted R2R server and added - some sample documents about retrieval augmented generation (RAG). - Just like that, we're ready to go! -
    - -
    - )} -
    - {messages.map((message, index) => ( - - {message.role === 'user' ? ( - - ) : ( - - )} - - ))} -
    -
    - {error &&
    Error: {error}
    } - {messages.length === 0 && } -
    - ); -}; diff --git a/templates/agentic_rag_chatbot/web-app/components/DefaultQueries.tsx b/templates/agentic_rag_chatbot/web-app/components/DefaultQueries.tsx deleted file mode 100644 index 01b95870a..000000000 --- a/templates/agentic_rag_chatbot/web-app/components/DefaultQueries.tsx +++ /dev/null @@ -1,52 +0,0 @@ -import { Lightbulb, FlaskConical, Flame, Earth } from 'lucide-react'; -import { FC } from 'react'; - -import { Logo } from '@/components/Logo'; -import { Alert, AlertDescription } from '@/components/ui/alert'; - -interface DefaultQueriesProps { - setQuery: (query: string) => void; -} - -export const DefaultQueries: FC = ({ setQuery }) => { - const defaultQueries = [ - { - query: 'What is RAG?', - icon: , - }, - { - query: 'How can RAG be used inside of my company?', - icon: , - }, - { - query: 'What is R2R?', - icon: , - }, - { - query: 'What makes R2R different from other solutions?', - icon: , - }, - ]; - - return ( -
    - -
    - {defaultQueries.map(({ query, icon }, index) => ( - = 2 ? 'hidden sm:flex' : '' - }`} - onClick={() => setQuery(query)} - > -
    {icon}
    - - {query} - -
    - ))} -
    -
    - ); -}; diff --git a/templates/agentic_rag_chatbot/web-app/components/DocumentCard.tsx b/templates/agentic_rag_chatbot/web-app/components/DocumentCard.tsx deleted file mode 100644 index bf5df15f5..000000000 --- a/templates/agentic_rag_chatbot/web-app/components/DocumentCard.tsx +++ /dev/null @@ -1,59 +0,0 @@ -import React, { useState } from 'react'; -import Modal from './Modal'; - -interface Document { - id: number; - name: string; - type: string; - url: string; -} - -interface DocumentCardProps { - document: Document; -} - -const DocumentCard: React.FC = ({ document }) => { - const [isPreviewOpen, setIsPreviewOpen] = useState(false); - - const renderPreview = () => { - switch (document.type) { - case 'pdf': - case 'txt': - case 'md': - case 'html': - return