config.yaml

vectordb:
  chroma:
    is_persistent: True
    # chroma_server_host:
    # chroma_server_http_port
    # chroma_server_ssl_enabled
    # chroma_server_api_default_path

  weaviate:

embedding:
  instructor:
    model_name: hkunlp/instructor-base
    model_kwargs:
      device: cuda

  llama:
    model_path: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf
    n_batch: 16
    n_ctx: 2048

  hugging_face:
    # model_name: all-MiniLM-L6-v2
    model_name: sentence-transformers/all-mpnet-base-v2
    model_kwargs:
      device: cpu

llm:
  llama:
    model_path: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf
    n_batch: 10
    n_ctx: 4096
    n_gpu_layers: -1
    template: "<|im_start|> system \nYou're an AI assistant good at finding relevant context from documents to answer questions provided by the user.  <|im_end|>\n<|im_start|> user\nUse the following documents as context to answer the question at the end. REMEMBER to excersice source critisicm as the documents are returned by a search provider that can return unrelated documents.\n\nSTART OF CONTEXT: \n{context} \n\nEND OF CONTEXT!\n\nIf you don't know the answer or are unsure, just say that you don't know, don't try to make up an answer. Don't mention the context in your answer but rather just answer the question directly. \nQuestion: {question} Let's think this step-by-step. \n<|im_end|>\n<|im_start|> assistant\n"
    end_separator: <|im_end|>
    model_kwargs:
      device: cuda

  ctransformer:
    model: dolphin-2.2.1-mistral-7b.Q5_K_M.gguf
    template: "<|im_start|> system \nYou're an AI assistant good at finding relevant context from documents to answer questions provided by the user.  <|im_end|>\n<|im_start|> user\nUse the following documents as context to answer the question at the end. REMEMBER to excersice source critisicm as the documents are returned by a search provider that can return unrelated documents.\n\nSTART OF CONTEXT: \n{context} \n\nEND OF CONTEXT!\n\nIf you don't know the answer or are unsure, just say that you don't know, don't try to make up an answer. Don't mention the context in your answer but rather just answer the question directly. \nQuestion: {question} Let's think this step-by-step. \n<|im_end|>\n<|im_start|> assistant\n"
    end_separator: <|im_end|>
    config:
      gpu_layers: -1
    model_kwargs:
      device: cuda
      
  hugging_face:
    model_id: gpt2
    task: text-generation
    pipeline_kwargs:
      config:
        max_length: 200
    template: ""