diff --git a/search_github_files/README.md b/search_github_files/README.md index 8ea65a8..d74e571 100644 --- a/search_github_files/README.md +++ b/search_github_files/README.md @@ -1,6 +1,6 @@ # Quickstart: Searching GitHub files with Spice -## Prerequistes +## Prerequistes - Ensure you have the Spice CLI installed. Follow the [Getting Started](https://docs.spiceai.org/getting-started) if you haven't done so. - Populate `.env`. - `GITHUB_TOKEN`: With a [personal access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#creating-a-personal-access-token-classic). @@ -34,7 +34,7 @@ Result: ## Utilizing Vector-Based Search -1. In the `spicepod.yaml`, uncomment the `datasets[0].embeddings`. +1. In the `spicepod.yaml`, uncomment the `datasets[0].columns[0].embeddings`. 2. Restart the spiced. 3. Perform a basic search ```shell @@ -82,7 +82,7 @@ Result: ### Additional Configuration - Chunking -1. Update the spicepod `datasets[0].embeddings.chunking.enabled: true`. +1. Update the spicepod `datasets[0].columns[0].embeddings.chunking.enabled: true`. 2. Restart the spiced. 3. Rerun the search ```shell diff --git a/search_github_files/spicepod.yaml b/search_github_files/spicepod.yaml index b74c62f..e26201c 100644 --- a/search_github_files/spicepod.yaml +++ b/search_github_files/spicepod.yaml @@ -11,22 +11,22 @@ models: embeddings: - name: local_embedding_model from: huggingface:huggingface.co/sentence-transformers/all-MiniLM-L6-v2 - + datasets: - from: github:github.com/spiceai/spiceai/files/trunk name: spiceai.files params: github_token: ${secrets:GITHUB_TOKEN} - include: 'docs/**/*.md' + include: "docs/**/*.md" acceleration: enabled: true - # embeddings: - # - column: content - # use: local_embedding_model - # column_pk: - # - path - # chunking: - # enabled: false - # target_chunk_size: 256 - # overlap_size: 64 - # file_format: md + columns: + - name: content + # embeddings: + # - from: local_embedding_model + # row_id: path + # chunking: + # enabled: false + # target_chunk_size: 256 + # overlap_size: 64 + # file_format: md