diff --git a/.semversioner/next-release/patch-20241105183532211035.json b/.semversioner/next-release/patch-20241105183532211035.json new file mode 100644 index 0000000000..9fb4e55619 --- /dev/null +++ b/.semversioner/next-release/patch-20241105183532211035.json @@ -0,0 +1,4 @@ +{ + "type": "patch", + "description": "Fix init defaults for vector store and drift img in docs" +} diff --git a/docs/query/drift_search.md b/docs/query/drift_search.md index f9bca9cfd5..33cae7cfbc 100644 --- a/docs/query/drift_search.md +++ b/docs/query/drift_search.md @@ -9,8 +9,10 @@ DRIFT search (Dynamic Reasoning and Inference with Flexible Traversal) builds up ## Methodology

-Figure 1. An entire DRIFT search hierarchy highlighting the three core phases of the DRIFT search process. A (Primer): DRIFT compares the user’s query with the top K most semantically relevant community reports, generating a broad initial answer and follow-up questions to steer further exploration. B (Follow-Up): DRIFT uses local search to refine queries, producing additional intermediate answers and follow-up questions that enhance specificity, guiding the engine towards context-rich information. A glyph on each node in the diagram shows the confidence the algorithm has to continue the query expansion step.  C (Output Hierarchy): The final output is a hierarchical structure of questions and answers ranked by relevance, reflecting a balanced mix of global insights and local refinements, making the results adaptable and comprehensive. +Figure 1. An entire DRIFT search hierarchy highlighting the three core phases of the DRIFT search process.

+

+Figure 1. An entire DRIFT search hierarchy highlighting the three core phases of the DRIFT search process. A (Primer): DRIFT compares the user’s query with the top K most semantically relevant community reports, generating a broad initial answer and follow-up questions to steer further exploration. B (Follow-Up): DRIFT uses local search to refine queries, producing additional intermediate answers and follow-up questions that enhance specificity, guiding the engine towards context-rich information. A glyph on each node in the diagram shows the confidence the algorithm has to continue the query expansion step. C (Output Hierarchy): The final output is a hierarchical structure of questions and answers ranked by relevance, reflecting a balanced mix of global insights and local refinements, making the results adaptable and comprehensive.

DRIFT Search introduces a new approach to local search queries by including community information in the search process. This greatly expands the breadth of the query’s starting point and leads to retrieval and usage of a far higher variety of facts in the final answer. This addition expands the GraphRAG query engine by providing a more comprehensive option for local search, which uses community insights to refine a query into detailed follow-up questions. diff --git a/graphrag/config/defaults.py b/graphrag/config/defaults.py index 7b1107ec00..be33a1e5eb 100644 --- a/graphrag/config/defaults.py +++ b/graphrag/config/defaults.py @@ -93,7 +93,7 @@ VECTOR_STORE = f""" type: {VectorStoreType.LanceDB.value} db_uri: '{(Path(STORAGE_BASE_DIR) / "lancedb")!s}' - collection_name: default + container_name: default # A prefix for the vector store to create embedding containers. Default: 'default'. overwrite: true\ """ diff --git a/graphrag/index/init_content.py b/graphrag/index/init_content.py index 869202790a..da3d19807d 100644 --- a/graphrag/index/init_content.py +++ b/graphrag/index/init_content.py @@ -49,7 +49,7 @@ # api_key: # if not set, will attempt to use managed identity. Expects the `Search Index Data Contributor` RBAC role in this case. # audience: # if using managed identity, the audience to use for the token # overwrite: true # or false. Only applicable at index creation time - # collection_name: # the name of the collection to use. Default: 'default' + # container_name: default # A prefix for the AzureAISearch to create indexes. Default: 'default'. llm: api_key: ${{GRAPHRAG_API_KEY}} type: {defs.EMBEDDING_TYPE.value} # or azure_openai_embedding diff --git a/pyproject.toml b/pyproject.toml index 1be78f1454..c411a191f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -139,6 +139,7 @@ test_smoke = "pytest ./tests/smoke" test_notebook = "pytest ./tests/notebook" test_verbs = "pytest ./tests/verbs" index = "python -m graphrag index" +init = "python -m graphrag init" query = "python -m graphrag query" prompt_tune = "python -m graphrag prompt-tune" # Pass in a test pattern